about summary refs log tree commit diff
path: root/library/stdarch/crates/intrinsic-test
diff options
context:
space:
mode:
authorJames McGregor <james.mcgregor2@arm.com>2022-06-21 18:42:39 +0100
committerAmanieu d'Antras <amanieu@gmail.com>2022-08-22 23:46:30 +0200
commit893bbdd7174bffbb3633528d2aec4715e268ebf0 (patch)
tree8c0a9034e5d2d7b71a1fb88000e2df8c1159d0af /library/stdarch/crates/intrinsic-test
parente79701c56ea58a9a9f0ed4cf692573809ef27c54 (diff)
downloadrust-893bbdd7174bffbb3633528d2aec4715e268ebf0.tar.gz
rust-893bbdd7174bffbb3633528d2aec4715e268ebf0.zip
Use load intrinsic and loop for intrinsic-test programs. Add --release flag back to intrinsic-test programs.
Diffstat (limited to 'library/stdarch/crates/intrinsic-test')
-rw-r--r--library/stdarch/crates/intrinsic-test/missing_aarch64.txt14
-rw-r--r--library/stdarch/crates/intrinsic-test/src/argument.rs107
-rw-r--r--library/stdarch/crates/intrinsic-test/src/intrinsic.rs65
-rw-r--r--library/stdarch/crates/intrinsic-test/src/main.rs54
-rw-r--r--library/stdarch/crates/intrinsic-test/src/types.rs44
-rw-r--r--library/stdarch/crates/intrinsic-test/src/values.rs9
6 files changed, 184 insertions, 109 deletions
diff --git a/library/stdarch/crates/intrinsic-test/missing_aarch64.txt b/library/stdarch/crates/intrinsic-test/missing_aarch64.txt
index 56ec274b5a7..93fc126e5ce 100644
--- a/library/stdarch/crates/intrinsic-test/missing_aarch64.txt
+++ b/library/stdarch/crates/intrinsic-test/missing_aarch64.txt
@@ -67,20 +67,6 @@ vrnd64xq_f64
 vrnd64z_f64
 vrnd64zq_f64
 
-# Takes too long to compile tests
-vcopyq_laneq_u8
-vcopyq_laneq_s8
-vcopyq_laneq_p8
-vcopyq_lane_u8
-vcopyq_lane_s8
-vcopyq_lane_p8
-vcopy_laneq_u8
-vcopy_laneq_s8
-vcopy_laneq_p8
-vcopy_lane_u8
-vcopy_lane_s8
-vcopy_lane_p8
-
 # QEMU 6.0 doesn't support these instructions
 vmmlaq_s32
 vmmlaq_u32
diff --git a/library/stdarch/crates/intrinsic-test/src/argument.rs b/library/stdarch/crates/intrinsic-test/src/argument.rs
index f4cb77992a7..798854c0390 100644
--- a/library/stdarch/crates/intrinsic-test/src/argument.rs
+++ b/library/stdarch/crates/intrinsic-test/src/argument.rs
@@ -1,6 +1,6 @@
 use std::ops::Range;
 
-use crate::types::IntrinsicType;
+use crate::types::{IntrinsicType, TypeKind};
 use crate::Language;
 
 /// An argument for the intrinsic.
@@ -90,49 +90,108 @@ impl ArgumentList {
             .join(", ")
     }
 
-    /// Creates a line that initializes this argument for C code.
-    /// e.g. `int32x2_t a = { 0x1, 0x2 };`
-    pub fn init_random_values_c(&self, pass: usize) -> String {
+    /// Creates a line for each argument that initializes an array for C from which `loads` argument
+    /// values can be loaded  as a sliding window.
+    /// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2.
+    pub fn gen_arglists_c(&self, loads: u32) -> String {
         self.iter()
             .filter_map(|arg| {
                 (!arg.has_constraint()).then(|| {
                     format!(
-                        "{ty} {name} = {{ {values} }};",
-                        ty = arg.to_c_type(),
+                        "const {ty} {name}_vals[] = {{ {values} }};",
+                        ty = arg.ty.c_scalar_type(),
                         name = arg.name,
-                        values = arg.ty.populate_random(pass, &Language::C)
+                        values = arg.ty.populate_random(loads, &Language::C)
                     )
                 })
             })
             .collect::<Vec<_>>()
-            .join("\n    ")
+            .join("\n")
     }
 
-    /// Creates a line that initializes this argument for Rust code.
-    /// e.g. `let a = transmute([0x1, 0x2]);`
-    pub fn init_random_values_rust(&self, pass: usize) -> String {
+    /// Creates a line for each argument that initializes an array for Rust from which `loads` argument
+    /// values can be loaded as a sliding window, e.g `const A_VALS: [u32; 20]  = [...];`
+    pub fn gen_arglists_rust(&self, loads: u32) -> String {
         self.iter()
             .filter_map(|arg| {
                 (!arg.has_constraint()).then(|| {
-                    if arg.is_simd() {
-                        format!(
-                            "let {name} = ::std::mem::transmute([{values}]);",
-                            name = arg.name,
-                            values = arg.ty.populate_random(pass, &Language::Rust),
-                        )
-                    } else {
-                        format!(
-                            "let {name} = {value};",
-                            name = arg.name,
-                            value = arg.ty.populate_random(pass, &Language::Rust)
-                        )
-                    }
+                    format!(
+                        "const {upper_name}_VALS: [{ty}; {load_size}] = unsafe{{ [{values}] }};",
+                        upper_name = arg.name.to_uppercase(),
+                        ty = arg.ty.rust_scalar_type(),
+                        load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1,
+                        values = arg.ty.populate_random(loads, &Language::Rust)
+                    )
+                })
+            })
+            .collect::<Vec<_>>()
+            .join("\n")
+    }
+
+    /// Creates a line for each argument that initalizes the argument from an array [arg]_vals at
+    /// an offset i using a load intrinsic, in C.
+    /// e.g `uint8x8_t a = vld1_u8(&a_vals[i]);`
+    pub fn load_values_c(&self, p64_armv7_workaround: bool) -> String {
+        self.iter()
+            .filter_map(|arg| {
+                // The ACLE doesn't support 64-bit polynomial loads on Armv7
+                // This and the cast are a workaround for this
+                let armv7_p64 = if let TypeKind::Poly = arg.ty.kind() {
+                    p64_armv7_workaround
+                } else {
+                    false
+                };
+
+                (!arg.has_constraint()).then(|| {
+                    format!(
+                        "{ty} {name} = {open_cast}{load}(&{name}_vals[i]){close_cast};",
+                        ty = arg.to_c_type(),
+                        name = arg.name,
+                        load = if arg.is_simd() {
+                            arg.ty.get_load_function(p64_armv7_workaround)
+                        } else {
+                            "*".to_string()
+                        },
+                        open_cast = if armv7_p64 {
+                            format!("cast<{}>(", arg.to_c_type())
+                        } else {
+                            "".to_string()
+                        },
+                        close_cast = if armv7_p64 {
+                            ")".to_string()
+                        } else {
+                            "".to_string()
+                        }
+                    )
                 })
             })
             .collect::<Vec<_>>()
             .join("\n        ")
     }
 
+    /// Creates a line for each argument that initalizes the argument from array [ARG]_VALS at
+    /// an offset i using a load intrinsic, in Rust.
+    /// e.g `let a = vld1_u8(A_VALS.as_ptr().offset(i));`
+    pub fn load_values_rust(&self) -> String {
+        self.iter()
+            .filter_map(|arg| {
+                (!arg.has_constraint()).then(|| {
+                    format!(
+                        "let {name} = {load}({upper_name}_VALS.as_ptr().offset(i));",
+                        name = arg.name,
+                        upper_name = arg.name.to_uppercase(),
+                        load = if arg.is_simd() {
+                            arg.ty.get_load_function(false)
+                        } else {
+                            "*".to_string()
+                        },
+                    )
+                })
+            })
+            .collect::<Vec<_>>()
+            .join("\n            ")
+    }
+
     pub fn iter(&self) -> std::slice::Iter<'_, Argument> {
         self.args.iter()
     }
diff --git a/library/stdarch/crates/intrinsic-test/src/intrinsic.rs b/library/stdarch/crates/intrinsic-test/src/intrinsic.rs
index 2b7130440f6..e0645a36bed 100644
--- a/library/stdarch/crates/intrinsic-test/src/intrinsic.rs
+++ b/library/stdarch/crates/intrinsic-test/src/intrinsic.rs
@@ -20,8 +20,9 @@ pub struct Intrinsic {
 
 impl Intrinsic {
     /// Generates a std::cout for the intrinsics results that will match the
-    /// rust debug output format for the return type.
-    pub fn print_result_c(&self, index: usize, additional: &str) -> String {
+    /// rust debug output format for the return type. The generated line assumes
+    /// there is an int i in scope which is the current pass number.
+    pub fn print_result_c(&self, additional: &str) -> String {
         let lanes = if self.results.num_vectors() > 1 {
             (0..self.results.num_vectors())
                 .map(|vector| {
@@ -72,7 +73,7 @@ impl Intrinsic {
         };
 
         format!(
-            r#"std::cout << "Result {additional}-{idx}: {ty}" << std::fixed << std::setprecision(150) <<  {lanes} << "{close}" << std::endl;"#,
+            r#"std::cout << "Result {additional}-" << i+1 << ": {ty}" << std::fixed << std::setprecision(150) <<  {lanes} << "{close}" << std::endl;"#,
             ty = if self.results.is_simd() {
                 format!("{}(", self.results.c_type())
             } else {
@@ -81,11 +82,31 @@ impl Intrinsic {
             close = if self.results.is_simd() { ")" } else { "" },
             lanes = lanes,
             additional = additional,
-            idx = index,
         )
     }
 
-    pub fn generate_pass_rust(&self, index: usize, additional: &str) -> String {
+    pub fn generate_loop_c(
+        &self,
+        additional: &str,
+        passes: u32,
+        p64_armv7_workaround: bool,
+    ) -> String {
+        format!(
+            r#"  {{
+    for (int i=0; i<{passes}; i++) {{
+        {loaded_args}
+        auto __return_value = {intrinsic_call}({args});
+        {print_result}
+    }}
+  }}"#,
+            loaded_args = self.arguments.load_values_c(p64_armv7_workaround),
+            intrinsic_call = self.name,
+            args = self.arguments.as_call_param_c(),
+            print_result = self.print_result_c(additional)
+        )
+    }
+
+    pub fn generate_loop_rust(&self, additional: &str, passes: u32) -> String {
         let constraints = self.arguments.as_constraint_parameters_rust();
         let constraints = if !constraints.is_empty() {
             format!("::<{}>", constraints)
@@ -94,32 +115,20 @@ impl Intrinsic {
         };
 
         format!(
-            r#"
-    unsafe {{
-        {initialized_args}
-        let res = {intrinsic_call}{const}({args});
-        println!("Result {additional}-{idx}: {{:.150?}}", res);
-    }}"#,
-            initialized_args = self.arguments.init_random_values_rust(index),
-            intrinsic_call = self.name,
-            args = self.arguments.as_call_param_rust(),
-            additional = additional,
-            idx = index,
-            const = constraints,
-        )
-    }
-
-    pub fn generate_pass_c(&self, index: usize, additional: &str) -> String {
-        format!(
             r#"  {{
-    {initialized_args}
-    auto __return_value = {intrinsic_call}({args});
-    {print_result}
+    for i in 0..{passes} {{
+        unsafe {{
+            {loaded_args}
+            let __return_value = {intrinsic_call}{const}({args});
+            println!("Result {additional}-{{}}: {{:.150?}}", i+1, __return_value);
+        }}
+    }}
   }}"#,
-            initialized_args = self.arguments.init_random_values_c(index),
+            loaded_args = self.arguments.load_values_rust(),
             intrinsic_call = self.name,
-            args = self.arguments.as_call_param_c(),
-            print_result = self.print_result_c(index, additional)
+            const = constraints,
+            args = self.arguments.as_call_param_rust(),
+            additional = additional,
         )
     }
 }
diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs
index 1b58da2fd7e..43f2df08bb9 100644
--- a/library/stdarch/crates/intrinsic-test/src/main.rs
+++ b/library/stdarch/crates/intrinsic-test/src/main.rs
@@ -23,13 +23,21 @@ mod intrinsic;
 mod types;
 mod values;
 
+// The number of times each intrinsic will be called.
+const PASSES: u32 = 20;
+
 #[derive(Debug, PartialEq)]
 pub enum Language {
     Rust,
     C,
 }
 
-fn gen_code_c(intrinsic: &Intrinsic, constraints: &[&Argument], name: String) -> String {
+fn gen_code_c(
+    intrinsic: &Intrinsic,
+    constraints: &[&Argument],
+    name: String,
+    p64_armv7_workaround: bool,
+) -> String {
     if let Some((current, constraints)) = constraints.split_last() {
         let range = current
             .constraints
@@ -47,19 +55,25 @@ fn gen_code_c(intrinsic: &Intrinsic, constraints: &[&Argument], name: String) ->
                     name = current.name,
                     ty = current.ty.c_type(),
                     val = i,
-                    pass = gen_code_c(intrinsic, constraints, format!("{}-{}", name, i))
+                    pass = gen_code_c(
+                        intrinsic,
+                        constraints,
+                        format!("{}-{}", name, i),
+                        p64_armv7_workaround
+                    )
                 )
             })
             .collect()
     } else {
-        (1..20)
-            .map(|idx| intrinsic.generate_pass_c(idx, &name))
-            .collect::<Vec<_>>()
-            .join("\n")
+        intrinsic.generate_loop_c(&name, PASSES, p64_armv7_workaround)
     }
 }
 
-fn generate_c_program(header_files: &[&str], intrinsic: &Intrinsic) -> String {
+fn generate_c_program(
+    header_files: &[&str],
+    intrinsic: &Intrinsic,
+    p64_armv7_workaround: bool,
+) -> String {
     let constraints = intrinsic
         .arguments
         .iter()
@@ -75,7 +89,7 @@ fn generate_c_program(header_files: &[&str], intrinsic: &Intrinsic) -> String {
 
 template<typename T1, typename T2> T1 cast(T2 x) {{
   static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same");
-  T1 ret = 0;
+  T1 ret{{}};
   memcpy(&ret, &x, sizeof(T1));
   return ret;
 }}
@@ -95,6 +109,8 @@ std::ostream& operator<<(std::ostream& os, poly128_t value) {{
 }}
 #endif
 
+{arglists}
+
 int main(int argc, char **argv) {{
 {passes}
     return 0;
@@ -104,7 +120,13 @@ int main(int argc, char **argv) {{
             .map(|header| format!("#include <{}>", header))
             .collect::<Vec<_>>()
             .join("\n"),
-        passes = gen_code_c(intrinsic, constraints.as_slice(), Default::default()),
+        arglists = intrinsic.arguments.gen_arglists_c(PASSES),
+        passes = gen_code_c(
+            intrinsic,
+            constraints.as_slice(),
+            Default::default(),
+            p64_armv7_workaround
+        ),
     )
 }
 
@@ -131,10 +153,7 @@ fn gen_code_rust(intrinsic: &Intrinsic, constraints: &[&Argument], name: String)
             })
             .collect()
     } else {
-        (1..20)
-            .map(|idx| intrinsic.generate_pass_rust(idx, &name))
-            .collect::<Vec<_>>()
-            .join("\n")
+        intrinsic.generate_loop_rust(&name, PASSES)
     }
 }
 
@@ -153,11 +172,14 @@ fn generate_rust_program(intrinsic: &Intrinsic, a32: bool) -> String {
 #![allow(non_upper_case_globals)]
 use core_arch::arch::{target_arch}::*;
 
+{arglists}
+
 fn main() {{
 {passes}
 }}
 "#,
         target_arch = if a32 { "arm" } else { "aarch64" },
+        arglists = intrinsic.arguments.gen_arglists_rust(PASSES),
         passes = gen_code_rust(intrinsic, &constraints, Default::default())
     )
 }
@@ -203,7 +225,7 @@ fn build_c(intrinsics: &Vec<Intrinsic>, compiler: &str, a32: bool) -> bool {
             let c_filename = format!(r#"c_programs/{}.cpp"#, i.name);
             let mut file = File::create(&c_filename).unwrap();
 
-            let c_code = generate_c_program(&["arm_neon.h", "arm_acle.h"], &i);
+            let c_code = generate_c_program(&["arm_neon.h", "arm_acle.h"], &i, a32);
             file.write_all(c_code.into_bytes().as_slice()).unwrap();
             compile_c(&c_filename, &i, compiler, a32)
         })
@@ -259,7 +281,7 @@ path = "{intrinsic}/main.rs""#,
         .current_dir("rust_programs")
         .arg("-c")
         .arg(format!(
-            "cargo {toolchain} build --target {target}",
+            "cargo {toolchain} build --target {target} --release",
             toolchain = toolchain,
             target = if a32 {
                 "armv7-unknown-linux-gnueabihf"
@@ -407,7 +429,7 @@ fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str, a
                 .current_dir("rust_programs")
                 .arg("-c")
                 .arg(format!(
-                    "cargo {toolchain} run --target {target} --bin {intrinsic}",
+                    "cargo {toolchain} run --target {target} --bin {intrinsic} --release",
                     intrinsic = intrinsic.name,
                     toolchain = toolchain,
                     target = if a32 {
diff --git a/library/stdarch/crates/intrinsic-test/src/types.rs b/library/stdarch/crates/intrinsic-test/src/types.rs
index e51e6164964..dd23586e76a 100644
--- a/library/stdarch/crates/intrinsic-test/src/types.rs
+++ b/library/stdarch/crates/intrinsic-test/src/types.rs
@@ -1,7 +1,7 @@
 use std::fmt;
 use std::str::FromStr;
 
-use crate::values::values_for_pass;
+use crate::values::value_for_array;
 use crate::Language;
 
 #[derive(Debug, PartialEq, Copy, Clone)]
@@ -160,8 +160,7 @@ impl IntrinsicType {
         }
     }
 
-    #[allow(unused)]
-    fn c_scalar_type(&self) -> String {
+    pub fn c_scalar_type(&self) -> String {
         format!(
             "{prefix}{bits}_t",
             prefix = self.kind().c_prefix(),
@@ -169,7 +168,7 @@ impl IntrinsicType {
         )
     }
 
-    fn rust_scalar_type(&self) -> String {
+    pub fn rust_scalar_type(&self) -> String {
         format!(
             "{prefix}{bits}",
             prefix = self.kind().rust_prefix(),
@@ -289,18 +288,19 @@ impl IntrinsicType {
         }
     }
 
-    /// Generates a comma list of values that can be used to initialize an
-    /// argument for the intrinsic call.
+    /// Generates a comma list of values that can be used to initialize the array that
+    /// an argument for the intrinsic call is loaded from.
     /// This is determistic based on the pass number.
     ///
-    /// * `pass`: The pass index, i.e. the iteration index for the call to an intrinsic
+    /// * `loads`: The number of values that need to be loaded from the argument array
+    /// * e.g for argument type uint32x2, loads=2 results in a string representing 4 32-bit values
     ///
     /// Returns a string such as
     /// * `0x1, 0x7F, 0xFF` if `language` is `Language::C`
     /// * `0x1 as _, 0x7F as _, 0xFF as _` if `language` is `Language::Rust`
-    pub fn populate_random(&self, pass: usize, language: &Language) -> String {
+    pub fn populate_random(&self, loads: u32, language: &Language) -> String {
         match self {
-            IntrinsicType::Ptr { child, .. } => child.populate_random(pass, language),
+            IntrinsicType::Ptr { child, .. } => child.populate_random(loads, language),
             IntrinsicType::Type {
                 bit_len: Some(bit_len),
                 kind,
@@ -308,11 +308,11 @@ impl IntrinsicType {
                 vec_len,
                 ..
             } if kind == &TypeKind::Int || kind == &TypeKind::UInt || kind == &TypeKind::Poly => (0
-                ..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1)))
+                ..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1))
                 .map(|i| {
                     format!(
                         "{}{}",
-                        values_for_pass(*bit_len, i, pass),
+                        value_for_array(*bit_len, i),
                         match language {
                             &Language::Rust => format!(" as {ty} ", ty = self.rust_scalar_type()),
                             &Language::C => String::from(""),
@@ -327,15 +327,15 @@ impl IntrinsicType {
                 simd_len,
                 vec_len,
                 ..
-            } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1)))
+            } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1))
                 .map(|i| {
                     format!(
                         "{}({})",
                         match language {
-                            &Language::Rust => "f32::from_bits",
+                            &Language::Rust => "std::mem::transmute",
                             &Language::C => "cast<float, uint32_t>",
                         },
-                        values_for_pass(32, i, pass),
+                        value_for_array(32, i),
                     )
                 })
                 .collect::<Vec<_>>()
@@ -346,15 +346,15 @@ impl IntrinsicType {
                 simd_len,
                 vec_len,
                 ..
-            } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1)))
+            } => (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1))
                 .map(|i| {
                     format!(
                         "{}({}{})",
                         match language {
-                            &Language::Rust => "f64::from_bits",
+                            &Language::Rust => "std::mem::transmute",
                             &Language::C => "cast<double, uint64_t>",
                         },
-                        values_for_pass(64, i, pass),
+                        value_for_array(64, i),
                         match language {
                             &Language::Rust => " as u64",
                             &Language::C => "",
@@ -368,10 +368,9 @@ impl IntrinsicType {
     }
 
     /// Determines the load function for this type.
-    #[allow(unused)]
-    pub fn get_load_function(&self) -> String {
+    pub fn get_load_function(&self, armv7_p64_workaround: bool) -> String {
         match self {
-            IntrinsicType::Ptr { child, .. } => child.get_load_function(),
+            IntrinsicType::Ptr { child, .. } => child.get_load_function(armv7_p64_workaround),
             IntrinsicType::Type {
                 kind: k,
                 bit_len: Some(bl),
@@ -379,7 +378,7 @@ impl IntrinsicType {
                 vec_len,
                 ..
             } => {
-                let quad = if (simd_len.unwrap_or(1) * bl) > 64 {
+                let quad = if simd_len.unwrap_or(1) * bl > 64 {
                     "q"
                 } else {
                     ""
@@ -390,7 +389,8 @@ impl IntrinsicType {
                         TypeKind::UInt => "u",
                         TypeKind::Int => "s",
                         TypeKind::Float => "f",
-                        TypeKind::Poly => "p",
+                        // The ACLE doesn't support 64-bit polynomial loads on Armv7
+                        TypeKind::Poly => if armv7_p64_workaround && *bl == 64 {"s"} else {"p"},
                         x => todo!("get_load_function TypeKind: {:#?}", x),
                     },
                     size = bl,
diff --git a/library/stdarch/crates/intrinsic-test/src/values.rs b/library/stdarch/crates/intrinsic-test/src/values.rs
index 4565edca092..64b4d9fc99f 100644
--- a/library/stdarch/crates/intrinsic-test/src/values.rs
+++ b/library/stdarch/crates/intrinsic-test/src/values.rs
@@ -1,9 +1,8 @@
-/// Gets a hex constant value for a single lane in in a determistic way
+/// Gets a hex constant value for a single value in the argument values array in a determistic way
 /// * `bits`: The number of bits for the type, only 8, 16, 32, 64 are valid values
-/// * `simd`: The index of the simd lane we are generating for
-/// * `pass`: The index of the pass we are generating the values for
-pub fn values_for_pass(bits: u32, simd: u32, pass: usize) -> String {
-    let index = pass + (simd as usize);
+/// * `index`: The position in the array we are generating for
+pub fn value_for_array(bits: u32, index: u32) -> String {
+    let index = index as usize;
 
     if bits == 8 {
         format!("{:#X}", VALUES_8[index % VALUES_8.len()])