about summary refs log tree commit diff
diff options
context:
space:
mode:
authorGuillaume Gomez <guillaume1.gomez@gmail.com>2022-05-03 23:00:09 +0200
committerGuillaume Gomez <guillaume1.gomez@gmail.com>2022-05-03 23:00:25 +0200
commit6e1bf49273a2011aa3fb26e641e410ca42d924af (patch)
treed7539c823cb45250b006716b534a386140217268
parent618ba484e9e66dece784753947e2ddec202d282a (diff)
downloadrust-6e1bf49273a2011aa3fb26e641e410ca42d924af.tar.gz
rust-6e1bf49273a2011aa3fb26e641e410ca42d924af.zip
Give priority to intrinsics translations from llvm
-rw-r--r--src/intrinsic/archs.rs36
-rw-r--r--tools/generate_intrinsics.py37
2 files changed, 45 insertions, 28 deletions
diff --git a/src/intrinsic/archs.rs b/src/intrinsic/archs.rs
index 2782332cc57..bfeb30f2913 100644
--- a/src/intrinsic/archs.rs
+++ b/src/intrinsic/archs.rs
@@ -3021,19 +3021,19 @@ match name {
     "llvm.x86.avx512.cvtss2usi64" => "__builtin_ia32_cvtss2usi64",
     "llvm.x86.avx512.cvttsd2si" => "__builtin_ia32_vcvttsd2si32",
     "llvm.x86.avx512.cvttsd2si64" => "__builtin_ia32_vcvttsd2si64",
-    "llvm.x86.avx512.cvttsd2usi" => "__builtin_ia32_cvttsd2usi",
-    // [DUPLICATE]: "llvm.x86.avx512.cvttsd2usi" => "__builtin_ia32_vcvttsd2usi32",
-    "llvm.x86.avx512.cvttsd2usi64" => "__builtin_ia32_cvttsd2usi64",
-    // [DUPLICATE]: "llvm.x86.avx512.cvttsd2usi64" => "__builtin_ia32_vcvttsd2usi64",
+    "llvm.x86.avx512.cvttsd2usi" => "__builtin_ia32_vcvttsd2usi32",
+    // [DUPLICATE]: "llvm.x86.avx512.cvttsd2usi" => "__builtin_ia32_cvttsd2usi",
+    "llvm.x86.avx512.cvttsd2usi64" => "__builtin_ia32_vcvttsd2usi64",
+    // [DUPLICATE]: "llvm.x86.avx512.cvttsd2usi64" => "__builtin_ia32_cvttsd2usi64",
     "llvm.x86.avx512.cvttss2si" => "__builtin_ia32_vcvttss2si32",
     "llvm.x86.avx512.cvttss2si64" => "__builtin_ia32_vcvttss2si64",
-    "llvm.x86.avx512.cvttss2usi" => "__builtin_ia32_cvttss2usi",
-    // [DUPLICATE]: "llvm.x86.avx512.cvttss2usi" => "__builtin_ia32_vcvttss2usi32",
-    "llvm.x86.avx512.cvttss2usi64" => "__builtin_ia32_cvttss2usi64",
-    // [DUPLICATE]: "llvm.x86.avx512.cvttss2usi64" => "__builtin_ia32_vcvttss2usi64",
+    "llvm.x86.avx512.cvttss2usi" => "__builtin_ia32_vcvttss2usi32",
+    // [DUPLICATE]: "llvm.x86.avx512.cvttss2usi" => "__builtin_ia32_cvttss2usi",
+    "llvm.x86.avx512.cvttss2usi64" => "__builtin_ia32_vcvttss2usi64",
+    // [DUPLICATE]: "llvm.x86.avx512.cvttss2usi64" => "__builtin_ia32_cvttss2usi64",
     "llvm.x86.avx512.cvtusi2sd" => "__builtin_ia32_cvtusi2sd",
-    "llvm.x86.avx512.cvtusi2ss" => "__builtin_ia32_cvtusi2ss",
-    // [DUPLICATE]: "llvm.x86.avx512.cvtusi2ss" => "__builtin_ia32_cvtusi2ss32",
+    "llvm.x86.avx512.cvtusi2ss" => "__builtin_ia32_cvtusi2ss32",
+    // [DUPLICATE]: "llvm.x86.avx512.cvtusi2ss" => "__builtin_ia32_cvtusi2ss",
     "llvm.x86.avx512.cvtusi642sd" => "__builtin_ia32_cvtusi2sd64",
     // [DUPLICATE]: "llvm.x86.avx512.cvtusi642sd" => "__builtin_ia32_cvtusi642sd",
     "llvm.x86.avx512.cvtusi642ss" => "__builtin_ia32_cvtusi2ss64",
@@ -3479,10 +3479,10 @@ match name {
     "llvm.x86.avx512.rcp14.ss" => "__builtin_ia32_rcp14ss_mask",
     "llvm.x86.avx512.rcp28.pd" => "__builtin_ia32_rcp28pd_mask",
     "llvm.x86.avx512.rcp28.ps" => "__builtin_ia32_rcp28ps_mask",
-    "llvm.x86.avx512.rcp28.sd" => "__builtin_ia32_rcp28sd_mask",
-    // [DUPLICATE]: "llvm.x86.avx512.rcp28.sd" => "__builtin_ia32_rcp28sd_round_mask",
-    "llvm.x86.avx512.rcp28.ss" => "__builtin_ia32_rcp28ss_mask",
-    // [DUPLICATE]: "llvm.x86.avx512.rcp28.ss" => "__builtin_ia32_rcp28ss_round_mask",
+    "llvm.x86.avx512.rcp28.sd" => "__builtin_ia32_rcp28sd_round_mask",
+    // [DUPLICATE]: "llvm.x86.avx512.rcp28.sd" => "__builtin_ia32_rcp28sd_mask",
+    "llvm.x86.avx512.rcp28.ss" => "__builtin_ia32_rcp28ss_round_mask",
+    // [DUPLICATE]: "llvm.x86.avx512.rcp28.ss" => "__builtin_ia32_rcp28ss_mask",
     "llvm.x86.avx512.rndscale.sd" => "__builtin_ia32_rndscalesd",
     "llvm.x86.avx512.rndscale.ss" => "__builtin_ia32_rndscaless",
     "llvm.x86.avx512.rsqrt14.pd.128" => "__builtin_ia32_rsqrt14pd128_mask",
@@ -3495,10 +3495,10 @@ match name {
     "llvm.x86.avx512.rsqrt14.ss" => "__builtin_ia32_rsqrt14ss_mask",
     "llvm.x86.avx512.rsqrt28.pd" => "__builtin_ia32_rsqrt28pd_mask",
     "llvm.x86.avx512.rsqrt28.ps" => "__builtin_ia32_rsqrt28ps_mask",
-    "llvm.x86.avx512.rsqrt28.sd" => "__builtin_ia32_rsqrt28sd_mask",
-    // [DUPLICATE]: "llvm.x86.avx512.rsqrt28.sd" => "__builtin_ia32_rsqrt28sd_round_mask",
-    "llvm.x86.avx512.rsqrt28.ss" => "__builtin_ia32_rsqrt28ss_mask",
-    // [DUPLICATE]: "llvm.x86.avx512.rsqrt28.ss" => "__builtin_ia32_rsqrt28ss_round_mask",
+    "llvm.x86.avx512.rsqrt28.sd" => "__builtin_ia32_rsqrt28sd_round_mask",
+    // [DUPLICATE]: "llvm.x86.avx512.rsqrt28.sd" => "__builtin_ia32_rsqrt28sd_mask",
+    "llvm.x86.avx512.rsqrt28.ss" => "__builtin_ia32_rsqrt28ss_round_mask",
+    // [DUPLICATE]: "llvm.x86.avx512.rsqrt28.ss" => "__builtin_ia32_rsqrt28ss_mask",
     "llvm.x86.avx512.scatter.dpd.512" => "__builtin_ia32_scattersiv8df",
     "llvm.x86.avx512.scatter.dpi.512" => "__builtin_ia32_scattersiv16si",
     "llvm.x86.avx512.scatter.dpq.512" => "__builtin_ia32_scattersiv8di",
diff --git a/tools/generate_intrinsics.py b/tools/generate_intrinsics.py
index 64f14143381..a1e28c3181c 100644
--- a/tools/generate_intrinsics.py
+++ b/tools/generate_intrinsics.py
@@ -144,12 +144,34 @@ def extract_instrinsics_from_llvmint(llvmint, intrinsics):
         append_translation(json_data, p, intrinsics[arch])
 
 
+def fill_intrinsics(intrinsics, from_intrinsics, all_intrinsics):
+    for arch in from_intrinsics:
+        if arch not in intrinsics:
+            intrinsics[arch] = []
+        for entry in from_intrinsics[arch]:
+            if entry[0] in all_intrinsics:
+                if all_intrinsics[entry[0]] == entry[1]:
+                    # This is a "full" duplicate, both the LLVM instruction and the GCC
+                    # translation are the same.
+                    continue
+                intrinsics[arch].append((entry[0], entry[1], True))
+            else:
+                intrinsics[arch].append((entry[0], entry[1], False))
+                all_intrinsics[entry[0]] = entry[1]
+
+
 def update_intrinsics(llvm_path, llvmint):
-    intrinsics = {}
+    intrinsics_llvm = {}
+    intrinsics_llvmint = {}
     all_intrinsics = {}
 
-    extract_instrinsics_from_llvm(llvm_path, intrinsics)
-    extract_instrinsics_from_llvmint(llvmint, intrinsics)
+    extract_instrinsics_from_llvm(llvm_path, intrinsics_llvm)
+    extract_instrinsics_from_llvmint(llvmint, intrinsics_llvmint)
+
+    intrinsics = {}
+    # We give priority to translations from LLVM over the ones from llvmint.
+    fill_intrinsics(intrinsics, intrinsics_llvm, all_intrinsics)
+    fill_intrinsics(intrinsics, intrinsics_llvmint, all_intrinsics)
 
     archs = [arch for arch in intrinsics]
     archs.sort()
@@ -166,18 +188,13 @@ def update_intrinsics(llvm_path, llvmint):
         for arch in archs:
             if len(intrinsics[arch]) == 0:
                 continue
-            intrinsics[arch].sort()
+            intrinsics[arch].sort(key=lambda x: (x[0], x[2]))
             out.write('    // {}\n'.format(arch))
             for entry in intrinsics[arch]:
-                if entry[0] in all_intrinsics:
-                    if all_intrinsics[entry[0]] == entry[1]:
-                        # This is a "full" duplicate, both the LLVM instruction and the GCC
-                        # translation are the same.
-                        continue
+                if entry[2] == True: # if it is a duplicate
                     out.write('    // [DUPLICATE]: "{}" => "{}",\n'.format(entry[0], entry[1]))
                 else:
                     out.write('    "{}" => "{}",\n'.format(entry[0], entry[1]))
-                    all_intrinsics[entry[0]] = entry[1]
         out.write('    _ => unimplemented!("***** unsupported LLVM intrinsic {}", name),\n')
         out.write("}\n")
     print("Done!")