about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2023-10-09 20:08:52 +0000
committerbors <bors@rust-lang.org>2023-10-09 20:08:52 +0000
commitaaba0a5806a963453a712c70d84909010dd2de50 (patch)
tree67f0dd870ba4046172113ef98c86d2589a8b5d4d
parentec1db97976f2757ff6d3960746d3688188b72523 (diff)
parent700bc97906d51df0c164898c01081bad43219038 (diff)
downloadrust-aaba0a5806a963453a712c70d84909010dd2de50.tar.gz
rust-aaba0a5806a963453a712c70d84909010dd2de50.zip
Auto merge of #3101 - eduardosm:x86-aes-intrinsics, r=RalfJung
Implement `llvm.x86.aesni.*` intrinsics
-rw-r--r--src/tools/miri/Cargo.lock72
-rw-r--r--src/tools/miri/Cargo.toml1
-rw-r--r--src/tools/miri/src/shims/x86/aesni.rs168
-rw-r--r--src/tools/miri/src/shims/x86/mod.rs7
-rw-r--r--src/tools/miri/tests/pass/intrinsics-x86-aes-vaes.rs291
5 files changed, 539 insertions, 0 deletions
diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock
index e654932255a..09f58a38dd4 100644
--- a/src/tools/miri/Cargo.lock
+++ b/src/tools/miri/Cargo.lock
@@ -18,6 +18,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 
 [[package]]
+name = "aes"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac1f845298e95f983ff1944b728ae08b8cebab80d684f0a832ed0fc74dfa27e2"
+dependencies = [
+ "cfg-if",
+ "cipher",
+ "cpufeatures",
+]
+
+[[package]]
 name = "aho-corasick"
 version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -143,6 +154,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
 [[package]]
+name = "cipher"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
+dependencies = [
+ "crypto-common",
+ "inout",
+]
+
+[[package]]
 name = "color-eyre"
 version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -200,6 +221,15 @@ dependencies = [
 ]
 
 [[package]]
+name = "cpufeatures"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1"
+dependencies = [
+ "libc",
+]
+
+[[package]]
 name = "crossbeam-channel"
 version = "0.5.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -219,6 +249,16 @@ dependencies = [
 ]
 
 [[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
 name = "ctrlc"
 version = "3.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -285,6 +325,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
 
 [[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
 name = "getrandom"
 version = "0.2.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -333,6 +383,15 @@ dependencies = [
 ]
 
 [[package]]
+name = "inout"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
 name = "instant"
 version = "0.1.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -469,6 +528,7 @@ dependencies = [
 name = "miri"
 version = "0.1.0"
 dependencies = [
+ "aes",
  "colored",
  "ctrlc",
  "env_logger",
@@ -910,6 +970,12 @@ dependencies = [
 ]
 
 [[package]]
+name = "typenum"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
+
+[[package]]
 name = "ui_test"
 version = "0.21.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -955,6 +1021,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
 
 [[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
+[[package]]
 name = "wasi"
 version = "0.11.0+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/src/tools/miri/Cargo.toml b/src/tools/miri/Cargo.toml
index c911a153c13..f8e507a11b0 100644
--- a/src/tools/miri/Cargo.toml
+++ b/src/tools/miri/Cargo.toml
@@ -23,6 +23,7 @@ env_logger = "0.10"
 log = "0.4"
 rand = "0.8"
 smallvec = "1.7"
+aes = { version = "0.8.3", features = ["hazmat"] }
 
 measureme = "10.0.0"
 ctrlc = "3.2.5"
diff --git a/src/tools/miri/src/shims/x86/aesni.rs b/src/tools/miri/src/shims/x86/aesni.rs
new file mode 100644
index 00000000000..aef930595b2
--- /dev/null
+++ b/src/tools/miri/src/shims/x86/aesni.rs
@@ -0,0 +1,168 @@
+use rustc_middle::ty::layout::LayoutOf as _;
+use rustc_middle::ty::Ty;
+use rustc_span::Symbol;
+use rustc_target::spec::abi::Abi;
+
+use crate::*;
+use shims::foreign_items::EmulateForeignItemResult;
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
+    crate::MiriInterpCxExt<'mir, 'tcx>
+{
+    fn emulate_x86_aesni_intrinsic(
+        &mut self,
+        link_name: Symbol,
+        abi: Abi,
+        args: &[OpTy<'tcx, Provenance>],
+        dest: &PlaceTy<'tcx, Provenance>,
+    ) -> InterpResult<'tcx, EmulateForeignItemResult> {
+        let this = self.eval_context_mut();
+        // Prefix should have already been checked.
+        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.aesni.").unwrap();
+
+        match unprefixed_name {
+            // Used to implement the _mm_aesdec_si128, _mm256_aesdec_epi128
+            // and _mm512_aesdec_epi128 functions.
+            // Performs one round of an AES decryption on each 128-bit word of
+            // `state` with the corresponding 128-bit key of `key`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128
+            "aesdec" | "aesdec.256" | "aesdec.512" => {
+                let [state, key] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                aes_round(this, state, key, dest, |state, key| {
+                    let key = aes::Block::from(key.to_le_bytes());
+                    let mut state = aes::Block::from(state.to_le_bytes());
+                    // `aes::hazmat::equiv_inv_cipher_round` documentation states that
+                    // it performs the same operation as the x86 aesdec instruction.
+                    aes::hazmat::equiv_inv_cipher_round(&mut state, &key);
+                    u128::from_le_bytes(state.into())
+                })?;
+            }
+            // Used to implement the _mm_aesdeclast_si128, _mm256_aesdeclast_epi128
+            // and _mm512_aesdeclast_epi128 functions.
+            // Performs last round of an AES decryption on each 128-bit word of
+            // `state` with the corresponding 128-bit key of `key`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128
+            "aesdeclast" | "aesdeclast.256" | "aesdeclast.512" => {
+                let [state, key] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                aes_round(this, state, key, dest, |state, key| {
+                    let mut state = aes::Block::from(state.to_le_bytes());
+                    // `aes::hazmat::equiv_inv_cipher_round` does the following operations:
+                    // state = InvShiftRows(state)
+                    // state = InvSubBytes(state)
+                    // state = InvMixColumns(state)
+                    // state = state ^ key
+                    // But we need to skip the InvMixColumns.
+                    // First, use a zeroed key to skip the XOR.
+                    aes::hazmat::equiv_inv_cipher_round(&mut state, &aes::Block::from([0; 16]));
+                    // Then, undo the InvMixColumns with MixColumns.
+                    aes::hazmat::mix_columns(&mut state);
+                    // Finally, do the XOR.
+                    u128::from_le_bytes(state.into()) ^ key
+                })?;
+            }
+            // Used to implement the _mm_aesenc_si128, _mm256_aesenc_epi128
+            // and _mm512_aesenc_epi128 functions.
+            // Performs one round of an AES encryption on each 128-bit word of
+            // `state` with the corresponding 128-bit key of `key`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128
+            "aesenc" | "aesenc.256" | "aesenc.512" => {
+                let [state, key] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                aes_round(this, state, key, dest, |state, key| {
+                    let key = aes::Block::from(key.to_le_bytes());
+                    let mut state = aes::Block::from(state.to_le_bytes());
+                    // `aes::hazmat::cipher_round` documentation states that
+                    // it performs the same operation as the x86 aesenc instruction.
+                    aes::hazmat::cipher_round(&mut state, &key);
+                    u128::from_le_bytes(state.into())
+                })?;
+            }
+            // Used to implement the _mm_aesenclast_si128, _mm256_aesenclast_epi128
+            // and _mm512_aesenclast_epi128 functions.
+            // Performs last round of an AES encryption on each 128-bit word of
+            // `state` with the corresponding 128-bit key of `key`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128
+            "aesenclast" | "aesenclast.256" | "aesenclast.512" => {
+                let [state, key] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                aes_round(this, state, key, dest, |state, key| {
+                    let mut state = aes::Block::from(state.to_le_bytes());
+                    // `aes::hazmat::cipher_round` does the following operations:
+                    // state = ShiftRows(state)
+                    // state = SubBytes(state)
+                    // state = MixColumns(state)
+                    // state = state ^ key
+                    // But we need to skip the MixColumns.
+                    // First, use a zeroed key to skip the XOR.
+                    aes::hazmat::cipher_round(&mut state, &aes::Block::from([0; 16]));
+                    // Then, undo the MixColumns with InvMixColumns.
+                    aes::hazmat::inv_mix_columns(&mut state);
+                    // Finally, do the XOR.
+                    u128::from_le_bytes(state.into()) ^ key
+                })?;
+            }
+            // Used to implement the _mm_aesimc_si128 function.
+            // Performs the AES InvMixColumns operation on `op`
+            "aesimc" => {
+                let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                // Transmute to `u128`
+                let op = op.transmute(this.machine.layouts.u128, this)?;
+                let dest = dest.transmute(this.machine.layouts.u128, this)?;
+
+                let state = this.read_scalar(&op)?.to_u128()?;
+                let mut state = aes::Block::from(state.to_le_bytes());
+                aes::hazmat::inv_mix_columns(&mut state);
+
+                this.write_scalar(Scalar::from_u128(u128::from_le_bytes(state.into())), &dest)?;
+            }
+            // TODO: Implement the `llvm.x86.aesni.aeskeygenassist` when possible
+            // with an external crate.
+            _ => return Ok(EmulateForeignItemResult::NotSupported),
+        }
+        Ok(EmulateForeignItemResult::NeedsJumping)
+    }
+}
+
+// Performs an AES round (given by `f`) on each 128-bit word of
+// `state` with the corresponding 128-bit key of `key`.
+fn aes_round<'tcx>(
+    this: &mut crate::MiriInterpCx<'_, 'tcx>,
+    state: &OpTy<'tcx, Provenance>,
+    key: &OpTy<'tcx, Provenance>,
+    dest: &PlaceTy<'tcx, Provenance>,
+    f: impl Fn(u128, u128) -> u128,
+) -> InterpResult<'tcx, ()> {
+    assert_eq!(dest.layout.size, state.layout.size);
+    assert_eq!(dest.layout.size, key.layout.size);
+
+    // Transmute arguments to arrays of `u128`.
+    assert_eq!(dest.layout.size.bytes() % 16, 0);
+    let len = dest.layout.size.bytes() / 16;
+
+    let u128_array_layout =
+        this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u128, len))?;
+
+    let state = state.transmute(u128_array_layout, this)?;
+    let key = key.transmute(u128_array_layout, this)?;
+    let dest = dest.transmute(u128_array_layout, this)?;
+
+    for i in 0..len {
+        let state = this.read_scalar(&this.project_index(&state, i)?)?.to_u128()?;
+        let key = this.read_scalar(&this.project_index(&key, i)?)?.to_u128()?;
+        let dest = this.project_index(&dest, i)?;
+
+        let res = f(state, key);
+
+        this.write_scalar(Scalar::from_u128(res), &dest)?;
+    }
+
+    Ok(())
+}
diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs
index 53a4a1ef28a..394c955e4c8 100644
--- a/src/tools/miri/src/shims/x86/mod.rs
+++ b/src/tools/miri/src/shims/x86/mod.rs
@@ -7,6 +7,7 @@ use crate::*;
 use helpers::bool_to_simd_element;
 use shims::foreign_items::EmulateForeignItemResult;
 
+mod aesni;
 mod sse;
 mod sse2;
 mod sse3;
@@ -100,6 +101,12 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                     this, link_name, abi, args, dest,
                 );
             }
+            name if name.starts_with("aesni.") => {
+                return aesni::EvalContextExt::emulate_x86_aesni_intrinsic(
+                    this, link_name, abi, args, dest,
+                );
+            }
+
             _ => return Ok(EmulateForeignItemResult::NotSupported),
         }
         Ok(EmulateForeignItemResult::NeedsJumping)
diff --git a/src/tools/miri/tests/pass/intrinsics-x86-aes-vaes.rs b/src/tools/miri/tests/pass/intrinsics-x86-aes-vaes.rs
new file mode 100644
index 00000000000..090b1db0af0
--- /dev/null
+++ b/src/tools/miri/tests/pass/intrinsics-x86-aes-vaes.rs
@@ -0,0 +1,291 @@
+// Ignore everything except x86 and x86_64
+// Any additional target are added to CI should be ignored here
+// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
+//@ignore-target-aarch64
+//@ignore-target-arm
+//@ignore-target-avr
+//@ignore-target-s390x
+//@ignore-target-thumbv7em
+//@ignore-target-wasm32
+//@compile-flags: -C target-feature=+aes,+vaes,+avx512f
+
+#![feature(avx512_target_feature, stdsimd)]
+
+use core::mem::transmute;
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+
+fn main() {
+    assert!(is_x86_feature_detected!("aes"));
+    assert!(is_x86_feature_detected!("vaes"));
+    assert!(is_x86_feature_detected!("avx512f"));
+
+    unsafe {
+        test_aes();
+        test_vaes();
+    }
+}
+
+// The constants in the tests below are just bit patterns. They should not
+// be interpreted as integers; signedness does not make sense for them, but
+// __m128i happens to be defined in terms of signed integers.
+#[allow(overflowing_literals)]
+#[target_feature(enable = "aes")]
+unsafe fn test_aes() {
+    // Mostly copied from library/stdarch/crates/core_arch/src/x86/aes.rs
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesdec_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
+        let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
+        let r = _mm_aesdec_si128(a, k);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesdec_si128();
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesdeclast_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
+        let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
+        let r = _mm_aesdeclast_si128(a, k);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesdeclast_si128();
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesenc_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
+        let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
+        let r = _mm_aesenc_si128(a, k);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesenc_si128();
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesenclast_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
+        let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
+        let r = _mm_aesenclast_si128(a, k);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesenclast_si128();
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesimc_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714195.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let e = _mm_set_epi64x(0xc66c82284ee40aa0, 0x6633441122770055);
+        let r = _mm_aesimc_si128(a);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesimc_si128();
+}
+
+// The constants in the tests below are just bit patterns. They should not
+// be interpreted as integers; signedness does not make sense for them, but
+// __m128i happens to be defined in terms of signed integers.
+#[allow(overflowing_literals)]
+#[target_feature(enable = "vaes,avx512f")]
+unsafe fn test_vaes() {
+    #[target_feature(enable = "avx")]
+    unsafe fn get_a256() -> __m256i {
+        // Constants are random
+        _mm256_set_epi64x(
+            0xb89f43a558d3cd51,
+            0x57b3e81e369bd603,
+            0xf177a1a626933fd6,
+            0x50d8adbed1a2f9d7,
+        )
+    }
+    #[target_feature(enable = "avx")]
+    unsafe fn get_k256() -> __m256i {
+        // Constants are random
+        _mm256_set_epi64x(
+            0x503ff704588b5627,
+            0xe23d882ed9c3c146,
+            0x2785e5b670155b3c,
+            0xa750718e183549ff,
+        )
+    }
+
+    #[target_feature(enable = "vaes")]
+    unsafe fn test_mm256_aesdec_epi128() {
+        let a = get_a256();
+        let k = get_k256();
+        let r = _mm256_aesdec_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 2] = transmute(a);
+        let k: [u128; 2] = transmute(k);
+        let r: [u128; 2] = transmute(r);
+        for i in 0..2 {
+            let e: u128 = transmute(_mm_aesdec_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm256_aesdec_epi128();
+
+    #[target_feature(enable = "vaes")]
+    unsafe fn test_mm256_aesdeclast_epi128() {
+        let a = get_a256();
+        let k = get_k256();
+        let r = _mm256_aesdeclast_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 2] = transmute(a);
+        let k: [u128; 2] = transmute(k);
+        let r: [u128; 2] = transmute(r);
+        for i in 0..2 {
+            let e: u128 = transmute(_mm_aesdeclast_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm256_aesdeclast_epi128();
+
+    #[target_feature(enable = "vaes")]
+    unsafe fn test_mm256_aesenc_epi128() {
+        let a = get_a256();
+        let k = get_k256();
+        let r = _mm256_aesenc_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 2] = transmute(a);
+        let k: [u128; 2] = transmute(k);
+        let r: [u128; 2] = transmute(r);
+        for i in 0..2 {
+            let e: u128 = transmute(_mm_aesenc_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm256_aesenc_epi128();
+
+    #[target_feature(enable = "vaes")]
+    unsafe fn test_mm256_aesenclast_epi128() {
+        let a = get_a256();
+        let k = get_k256();
+        let r = _mm256_aesenclast_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 2] = transmute(a);
+        let k: [u128; 2] = transmute(k);
+        let r: [u128; 2] = transmute(r);
+        for i in 0..2 {
+            let e: u128 = transmute(_mm_aesenclast_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm256_aesenclast_epi128();
+
+    #[target_feature(enable = "avx512f")]
+    unsafe fn get_a512() -> __m512i {
+        // Constants are random
+        _mm512_set_epi64(
+            0xb89f43a558d3cd51,
+            0x57b3e81e369bd603,
+            0xf177a1a626933fd6,
+            0x50d8adbed1a2f9d7,
+            0xfbfee3116629db78,
+            0x6aef4a91f2ad50f4,
+            0x4258bb51ff1d476d,
+            0x31da65761c8016cf,
+        )
+    }
+    #[target_feature(enable = "avx512f")]
+    unsafe fn get_k512() -> __m512i {
+        // Constants are random
+        _mm512_set_epi64(
+            0x503ff704588b5627,
+            0xe23d882ed9c3c146,
+            0x2785e5b670155b3c,
+            0xa750718e183549ff,
+            0xdfb408830a65d3d9,
+            0x0de3d92adac81b0a,
+            0xed2741fe12877cae,
+            0x3251ddb5404e0974,
+        )
+    }
+
+    #[target_feature(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesdec_epi128() {
+        let a = get_a512();
+        let k = get_k512();
+        let r = _mm512_aesdec_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 4] = transmute(a);
+        let k: [u128; 4] = transmute(k);
+        let r: [u128; 4] = transmute(r);
+        for i in 0..4 {
+            let e: u128 = transmute(_mm_aesdec_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm512_aesdec_epi128();
+
+    #[target_feature(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesdeclast_epi128() {
+        let a = get_a512();
+        let k = get_k512();
+        let r = _mm512_aesdeclast_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 4] = transmute(a);
+        let k: [u128; 4] = transmute(k);
+        let r: [u128; 4] = transmute(r);
+        for i in 0..4 {
+            let e: u128 = transmute(_mm_aesdeclast_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm512_aesdeclast_epi128();
+
+    #[target_feature(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesenc_epi128() {
+        let a = get_a512();
+        let k = get_k512();
+        let r = _mm512_aesenc_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 4] = transmute(a);
+        let k: [u128; 4] = transmute(k);
+        let r: [u128; 4] = transmute(r);
+        for i in 0..4 {
+            let e: u128 = transmute(_mm_aesenc_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm512_aesenc_epi128();
+
+    #[target_feature(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesenclast_epi128() {
+        let a = get_a512();
+        let k = get_k512();
+        let r = _mm512_aesenclast_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 4] = transmute(a);
+        let k: [u128; 4] = transmute(k);
+        let r: [u128; 4] = transmute(r);
+        for i in 0..4 {
+            let e: u128 = transmute(_mm_aesenclast_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm512_aesenclast_epi128();
+}
+
+#[track_caller]
+#[target_feature(enable = "sse2")]
+unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
+    assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
+}