about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2019-02-01 23:43:34 +0000
committerbors <bors@rust-lang.org>2019-02-01 23:43:34 +0000
commit2efa31b2d9bf171fecd294b8e0126d8ffdb453e3 (patch)
treea7be085de514ca23d9b4737697eb4aec45092460
parent852701ad6df90f4e4cdb11d487373f026f38e5b3 (diff)
parent49931fda56dc6268ba3c104b64768f551cfc4636 (diff)
downloadrust-2efa31b2d9bf171fecd294b8e0126d8ffdb453e3.tar.gz
rust-2efa31b2d9bf171fecd294b8e0126d8ffdb453e3.zip
Auto merge of #57937 - denzp:nvptx, r=nagisa
NVPTX target specification

This change adds a built-in `nvptx64-nvidia-cuda` GPGPU no-std target specification and a basic PTX assembly smoke tests.

The approach is taken here and the target spec is based on `ptx-linker`, a project started about 1.5 years ago. Key feature: bitcode object files being linked with LTO into the final module on the linker's side.

Prior to this change, the linker used a `ld` linker-flavor, but I think, having the special CLI convention is a more reliable way.

Questions about further progress on reliable CUDA workflow with Rust:
1. Is it possible to create a test suite `codegen-asm` to verify end-to-end integration with LLVM backend?
1. How would it be better to organise no-std `compile-fail` tests: add `#![no_std]` where possible and mark others as `ignore-nvptx` directive, or alternatively, introduce `compile-fail-no-std` test suite?
1. Can we have the `ptx-linker` eventually be integrated as `rls` or `clippy`? Hopefully, this should allow to statically link against LLVM used in Rust and get rid of the [current hacky solution](https://github.com/denzp/rustc-llvm-proxy).
1. Am I missing some methods from `rustc_codegen_ssa::back::linker::Linker` that can be useful for bitcode-only linking?

Currently, there are no major public CUDA projects written in Rust I'm aware of, but I'm expecting to have a built-in target will create a solid foundation for further experiments and awesome crates.

Related to #38789
Fixes #38787
Fixes #38786
-rw-r--r--.travis.yml2
-rw-r--r--src/bootstrap/lib.rs1
-rw-r--r--src/bootstrap/sanity.rs4
-rw-r--r--src/ci/docker/dist-various-2/Dockerfile1
-rw-r--r--src/ci/docker/test-various/Dockerfile (renamed from src/ci/docker/wasm32-unknown/Dockerfile)19
-rw-r--r--src/librustc/ty/context.rs6
-rw-r--r--src/librustc_codegen_ssa/back/link.rs1
-rw-r--r--src/librustc_codegen_ssa/back/linker.rs132
-rw-r--r--src/librustc_codegen_utils/lib.rs1
-rw-r--r--src/librustc_codegen_utils/symbol_names.rs127
-rw-r--r--src/librustc_target/spec/mod.rs4
-rw-r--r--src/librustc_target/spec/nvptx64_nvidia_cuda.rs73
-rw-r--r--src/test/run-make/nvptx-binary-crate/Makefile12
-rw-r--r--src/test/run-make/nvptx-binary-crate/main.rs28
-rw-r--r--src/test/run-make/nvptx-dylib-crate/Makefile10
-rw-r--r--src/test/run-make/nvptx-dylib-crate/dep.rs14
-rw-r--r--src/test/run-make/nvptx-dylib-crate/kernel.rs59
-rw-r--r--src/test/run-make/nvptx-emit-asm/Makefile9
-rw-r--r--src/test/run-make/nvptx-emit-asm/kernel.rs41
19 files changed, 483 insertions, 61 deletions
diff --git a/.travis.yml b/.travis.yml
index c4efa884603..7985b6c0e19 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -168,7 +168,7 @@ matrix:
       if: branch = auto
     - env: IMAGE=i686-gnu-nopt
       if: branch = auto
-    - env: IMAGE=wasm32-unknown
+    - env: IMAGE=test-various
       if: branch = auto
     - env: IMAGE=x86_64-gnu
       if: branch = auto
diff --git a/src/bootstrap/lib.rs b/src/bootstrap/lib.rs
index 32b03c5fb1b..1aa2e116a5a 100644
--- a/src/bootstrap/lib.rs
+++ b/src/bootstrap/lib.rs
@@ -831,6 +831,7 @@ impl Build {
                   !target.contains("msvc") &&
                   !target.contains("emscripten") &&
                   !target.contains("wasm32") &&
+                  !target.contains("nvptx") &&
                   !target.contains("fuchsia") {
             Some(self.cc(target))
         } else {
diff --git a/src/bootstrap/sanity.rs b/src/bootstrap/sanity.rs
index fe547a6b151..ff4fb85bbfa 100644
--- a/src/bootstrap/sanity.rs
+++ b/src/bootstrap/sanity.rs
@@ -156,7 +156,7 @@ pub fn check(build: &mut Build) {
             panic!("the iOS target is only supported on macOS");
         }
 
-        if target.contains("-none-") {
+        if target.contains("-none-") || target.contains("nvptx") {
             if build.no_std(*target).is_none() {
                 let target = build.config.target_config.entry(target.clone())
                     .or_default();
@@ -165,7 +165,7 @@ pub fn check(build: &mut Build) {
             }
 
             if build.no_std(*target) == Some(false) {
-                panic!("All the *-none-* targets are no-std targets")
+                panic!("All the *-none-* and nvptx* targets are no-std targets")
             }
         }
 
diff --git a/src/ci/docker/dist-various-2/Dockerfile b/src/ci/docker/dist-various-2/Dockerfile
index 97892405b8e..e2710a18bda 100644
--- a/src/ci/docker/dist-various-2/Dockerfile
+++ b/src/ci/docker/dist-various-2/Dockerfile
@@ -70,6 +70,7 @@ ENV TARGETS=$TARGETS,x86_64-sun-solaris
 ENV TARGETS=$TARGETS,x86_64-unknown-linux-gnux32
 ENV TARGETS=$TARGETS,x86_64-unknown-cloudabi
 ENV TARGETS=$TARGETS,x86_64-fortanix-unknown-sgx
+ENV TARGETS=$TARGETS,nvptx64-nvidia-cuda
 
 ENV X86_FORTANIX_SGX_LIBS="/x86_64-fortanix-unknown-sgx/lib/"
 
diff --git a/src/ci/docker/wasm32-unknown/Dockerfile b/src/ci/docker/test-various/Dockerfile
index 161f0c0062f..6c419e13c9f 100644
--- a/src/ci/docker/wasm32-unknown/Dockerfile
+++ b/src/ci/docker/test-various/Dockerfile
@@ -13,14 +13,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   gdb \
   xz-utils
 
+# FIXME: build the `ptx-linker` instead.
+RUN curl -sL https://github.com/denzp/rust-ptx-linker/releases/download/v0.9.0-alpha.2/rust-ptx-linker.linux64.tar.gz | \
+  tar -xzvC /usr/bin
+
 RUN curl -sL https://nodejs.org/dist/v9.2.0/node-v9.2.0-linux-x64.tar.xz | \
-    tar -xJ
+  tar -xJ
 
 COPY scripts/sccache.sh /scripts/
 RUN sh /scripts/sccache.sh
 
-ENV TARGETS=wasm32-unknown-unknown
-
 ENV RUST_CONFIGURE_ARGS \
   --set build.nodejs=/node-v9.2.0-linux-x64/bin/node \
   --set rust.lld
@@ -31,11 +33,18 @@ ENV RUST_CONFIGURE_ARGS \
 # other contexts as well
 ENV NO_DEBUG_ASSERTIONS=1
 
-ENV SCRIPT python2.7 /checkout/x.py test --target $TARGETS \
+ENV WASM_TARGETS=wasm32-unknown-unknown
+ENV WASM_SCRIPT python2.7 /checkout/x.py test --target $WASM_TARGETS \
   src/test/run-make \
   src/test/ui \
   src/test/run-pass \
   src/test/compile-fail \
   src/test/mir-opt \
   src/test/codegen-units \
-  src/libcore \
+  src/libcore
+
+ENV NVPTX_TARGETS=nvptx64-nvidia-cuda
+ENV NVPTX_SCRIPT python2.7 /checkout/x.py test --target $NVPTX_TARGETS \
+  src/test/run-make
+
+ENV SCRIPT $WASM_SCRIPT && $NVPTX_SCRIPT
diff --git a/src/librustc/ty/context.rs b/src/librustc/ty/context.rs
index 881c0d4e6d2..b379b5ba024 100644
--- a/src/librustc/ty/context.rs
+++ b/src/librustc/ty/context.rs
@@ -1675,6 +1675,12 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
         }
         false
     }
+
+    /// Determine whether identifiers in the assembly have strict naming rules.
+    /// Currently, only NVPTX* targets need it.
+    pub fn has_strict_asm_symbol_naming(&self) -> bool {
+        self.gcx.sess.target.target.arch.contains("nvptx")
+    }
 }
 
 impl<'a, 'tcx> TyCtxt<'a, 'tcx, 'tcx> {
diff --git a/src/librustc_codegen_ssa/back/link.rs b/src/librustc_codegen_ssa/back/link.rs
index d03bb0a3d73..2a5ecf9a059 100644
--- a/src/librustc_codegen_ssa/back/link.rs
+++ b/src/librustc_codegen_ssa/back/link.rs
@@ -149,6 +149,7 @@ pub fn linker_and_flavor(sess: &Session) -> (PathBuf, LinkerFlavor) {
                 LinkerFlavor::Ld => "ld",
                 LinkerFlavor::Msvc => "link.exe",
                 LinkerFlavor::Lld(_) => "lld",
+                LinkerFlavor::PtxLinker => "rust-ptx-linker",
             }), flavor)),
             (Some(linker), None) => {
                 let stem = if linker.extension().and_then(|ext| ext.to_str()) == Some("exe") {
diff --git a/src/librustc_codegen_ssa/back/linker.rs b/src/librustc_codegen_ssa/back/linker.rs
index ad61f8f01d8..249715a7b6e 100644
--- a/src/librustc_codegen_ssa/back/linker.rs
+++ b/src/librustc_codegen_ssa/back/linker.rs
@@ -13,7 +13,7 @@ use rustc::hir::def_id::{LOCAL_CRATE, CrateNum};
 use rustc::middle::dependency_format::Linkage;
 use rustc::session::Session;
 use rustc::session::config::{self, CrateType, OptLevel, DebugInfo,
-                             CrossLangLto};
+                             CrossLangLto, Lto};
 use rustc::ty::TyCtxt;
 use rustc_target::spec::{LinkerFlavor, LldFlavor};
 use serialize::{json, Encoder};
@@ -83,6 +83,10 @@ impl LinkerInfo {
             LinkerFlavor::Lld(LldFlavor::Wasm) => {
                 Box::new(WasmLd::new(cmd, sess, self)) as Box<dyn Linker>
             }
+
+            LinkerFlavor::PtxLinker => {
+                Box::new(PtxLinker { cmd, sess }) as Box<dyn Linker>
+            }
         }
     }
 }
@@ -1080,3 +1084,129 @@ fn exported_symbols(tcx: TyCtxt, crate_type: CrateType) -> Vec<String> {
 
     symbols
 }
+
+/// Much simplified and explicit CLI for the NVPTX linker. The linker operates
+/// with bitcode and uses LLVM backend to generate a PTX assembly.
+pub struct PtxLinker<'a> {
+    cmd: Command,
+    sess: &'a Session,
+}
+
+impl<'a> Linker for PtxLinker<'a> {
+    fn link_rlib(&mut self, path: &Path) {
+        self.cmd.arg("--rlib").arg(path);
+    }
+
+    fn link_whole_rlib(&mut self, path: &Path) {
+        self.cmd.arg("--rlib").arg(path);
+    }
+
+    fn include_path(&mut self, path: &Path) {
+        self.cmd.arg("-L").arg(path);
+    }
+
+    fn debuginfo(&mut self) {
+        self.cmd.arg("--debug");
+    }
+
+    fn add_object(&mut self, path: &Path) {
+        self.cmd.arg("--bitcode").arg(path);
+    }
+
+    fn args(&mut self, args: &[String]) {
+        self.cmd.args(args);
+    }
+
+    fn optimize(&mut self) {
+        match self.sess.lto() {
+            Lto::Thin | Lto::Fat | Lto::ThinLocal => {
+                self.cmd.arg("-Olto");
+            },
+
+            Lto::No => { },
+        };
+    }
+
+    fn output_filename(&mut self, path: &Path) {
+        self.cmd.arg("-o").arg(path);
+    }
+
+    fn finalize(&mut self) -> Command {
+        // Provide the linker with fallback to internal `target-cpu`.
+        self.cmd.arg("--fallback-arch").arg(match self.sess.opts.cg.target_cpu {
+            Some(ref s) => s,
+            None => &self.sess.target.target.options.cpu
+        });
+
+        ::std::mem::replace(&mut self.cmd, Command::new(""))
+    }
+
+    fn link_dylib(&mut self, _lib: &str) {
+        panic!("external dylibs not supported")
+    }
+
+    fn link_rust_dylib(&mut self, _lib: &str, _path: &Path) {
+        panic!("external dylibs not supported")
+    }
+
+    fn link_staticlib(&mut self, _lib: &str) {
+        panic!("staticlibs not supported")
+    }
+
+    fn link_whole_staticlib(&mut self, _lib: &str, _search_path: &[PathBuf]) {
+        panic!("staticlibs not supported")
+    }
+
+    fn framework_path(&mut self, _path: &Path) {
+        panic!("frameworks not supported")
+    }
+
+    fn link_framework(&mut self, _framework: &str) {
+        panic!("frameworks not supported")
+    }
+
+    fn position_independent_executable(&mut self) {
+    }
+
+    fn full_relro(&mut self) {
+    }
+
+    fn partial_relro(&mut self) {
+    }
+
+    fn no_relro(&mut self) {
+    }
+
+    fn build_static_executable(&mut self) {
+    }
+
+    fn gc_sections(&mut self, _keep_metadata: bool) {
+    }
+
+    fn pgo_gen(&mut self) {
+    }
+
+    fn no_default_libraries(&mut self) {
+    }
+
+    fn build_dylib(&mut self, _out_filename: &Path) {
+    }
+
+    fn export_symbols(&mut self, _tmpdir: &Path, _crate_type: CrateType) {
+    }
+
+    fn subsystem(&mut self, _subsystem: &str) {
+    }
+
+    fn no_position_independent_executable(&mut self) {
+    }
+
+    fn group_start(&mut self) {
+    }
+
+    fn group_end(&mut self) {
+    }
+
+    fn cross_lang_lto(&mut self) {
+    }
+}
diff --git a/src/librustc_codegen_utils/lib.rs b/src/librustc_codegen_utils/lib.rs
index 1f590d46ed8..8e96f985401 100644
--- a/src/librustc_codegen_utils/lib.rs
+++ b/src/librustc_codegen_utils/lib.rs
@@ -12,6 +12,7 @@
 #![feature(nll)]
 #![allow(unused_attributes)]
 #![feature(rustc_diagnostic_macros)]
+#![feature(in_band_lifetimes)]
 
 #![recursion_limit="256"]
 
diff --git a/src/librustc_codegen_utils/symbol_names.rs b/src/librustc_codegen_utils/symbol_names.rs
index 9267f14f242..3238a0b10bf 100644
--- a/src/librustc_codegen_utils/symbol_names.rs
+++ b/src/librustc_codegen_utils/symbol_names.rs
@@ -221,7 +221,7 @@ fn get_symbol_hash<'a, 'tcx>(
 }
 
 fn def_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, def_id: DefId) -> ty::SymbolName {
-    let mut buffer = SymbolPathBuffer::new();
+    let mut buffer = SymbolPathBuffer::new(tcx);
     item_path::with_forced_absolute_paths(|| {
         tcx.push_item_path(&mut buffer, def_id, false);
     });
@@ -317,7 +317,7 @@ fn compute_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, instance: Instance
 
     let hash = get_symbol_hash(tcx, def_id, instance, instance_ty, substs);
 
-    let mut buf = SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id));
+    let mut buf = SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id), tcx);
 
     if instance.is_vtable_shim() {
         buf.push("{{vtable-shim}}");
@@ -343,22 +343,25 @@ fn compute_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, instance: Instance
 struct SymbolPathBuffer {
     result: String,
     temp_buf: String,
+    strict_naming: bool,
 }
 
 impl SymbolPathBuffer {
-    fn new() -> Self {
+    fn new(tcx: TyCtxt<'_, '_, '_>) -> Self {
         let mut result = SymbolPathBuffer {
             result: String::with_capacity(64),
             temp_buf: String::with_capacity(16),
+            strict_naming: tcx.has_strict_asm_symbol_naming(),
         };
         result.result.push_str("_ZN"); // _Z == Begin name-sequence, N == nested
         result
     }
 
-    fn from_interned(symbol: ty::SymbolName) -> Self {
+    fn from_interned(symbol: ty::SymbolName, tcx: TyCtxt<'_, '_, '_>) -> Self {
         let mut result = SymbolPathBuffer {
             result: String::with_capacity(64),
             temp_buf: String::with_capacity(16),
+            strict_naming: tcx.has_strict_asm_symbol_naming(),
         };
         result.result.push_str(&symbol.as_str());
         result
@@ -375,68 +378,88 @@ impl SymbolPathBuffer {
         let _ = write!(self.result, "17h{:016x}E", hash);
         self.result
     }
-}
 
-impl ItemPathBuffer for SymbolPathBuffer {
-    fn root_mode(&self) -> &RootMode {
-        const ABSOLUTE: &RootMode = &RootMode::Absolute;
-        ABSOLUTE
-    }
-
-    fn push(&mut self, text: &str) {
+    // Name sanitation. LLVM will happily accept identifiers with weird names, but
+    // gas doesn't!
+    // gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $
+    // NVPTX assembly has more strict naming rules than gas, so additionally, dots
+    // are replaced with '$' there.
+    fn sanitize_and_append(&mut self, s: &str) {
         self.temp_buf.clear();
-        let need_underscore = sanitize(&mut self.temp_buf, text);
+
+        for c in s.chars() {
+            match c {
+                // Escape these with $ sequences
+                '@' => self.temp_buf.push_str("$SP$"),
+                '*' => self.temp_buf.push_str("$BP$"),
+                '&' => self.temp_buf.push_str("$RF$"),
+                '<' => self.temp_buf.push_str("$LT$"),
+                '>' => self.temp_buf.push_str("$GT$"),
+                '(' => self.temp_buf.push_str("$LP$"),
+                ')' => self.temp_buf.push_str("$RP$"),
+                ',' => self.temp_buf.push_str("$C$"),
+
+                '-' | ':' => if self.strict_naming {
+                    // NVPTX doesn't support these characters in symbol names.
+                    self.temp_buf.push('$')
+                }
+                else {
+                    // '.' doesn't occur in types and functions, so reuse it
+                    // for ':' and '-'
+                    self.temp_buf.push('.')
+                },
+
+                '.' => if self.strict_naming {
+                    self.temp_buf.push('$')
+                }
+                else {
+                    self.temp_buf.push('.')
+                },
+
+                // These are legal symbols
+                'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '$' => self.temp_buf.push(c),
+
+                _ => {
+                    self.temp_buf.push('$');
+                    for c in c.escape_unicode().skip(1) {
+                        match c {
+                            '{' => {}
+                            '}' => self.temp_buf.push('$'),
+                            c => self.temp_buf.push(c),
+                        }
+                    }
+                }
+            }
+        }
+
+        let need_underscore = {
+            // Underscore-qualify anything that didn't start as an ident.
+            !self.temp_buf.is_empty()
+                && self.temp_buf.as_bytes()[0] != '_' as u8
+                && !(self.temp_buf.as_bytes()[0] as char).is_xid_start()
+        };
+
         let _ = write!(
             self.result,
             "{}",
             self.temp_buf.len() + (need_underscore as usize)
         );
+
         if need_underscore {
             self.result.push('_');
         }
+
         self.result.push_str(&self.temp_buf);
     }
 }
 
-// Name sanitation. LLVM will happily accept identifiers with weird names, but
-// gas doesn't!
-// gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $
-//
-// returns true if an underscore must be added at the start
-pub fn sanitize(result: &mut String, s: &str) -> bool {
-    for c in s.chars() {
-        match c {
-            // Escape these with $ sequences
-            '@' => result.push_str("$SP$"),
-            '*' => result.push_str("$BP$"),
-            '&' => result.push_str("$RF$"),
-            '<' => result.push_str("$LT$"),
-            '>' => result.push_str("$GT$"),
-            '(' => result.push_str("$LP$"),
-            ')' => result.push_str("$RP$"),
-            ',' => result.push_str("$C$"),
-
-            // '.' doesn't occur in types and functions, so reuse it
-            // for ':' and '-'
-            '-' | ':' => result.push('.'),
-
-            // These are legal symbols
-            'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '.' | '$' => result.push(c),
-
-            _ => {
-                result.push('$');
-                for c in c.escape_unicode().skip(1) {
-                    match c {
-                        '{' => {}
-                        '}' => result.push('$'),
-                        c => result.push(c),
-                    }
-                }
-            }
-        }
+impl ItemPathBuffer for SymbolPathBuffer {
+    fn root_mode(&self) -> &RootMode {
+        const ABSOLUTE: &RootMode = &RootMode::Absolute;
+        ABSOLUTE
     }
 
-    // Underscore-qualify anything that didn't start as an ident.
-    !result.is_empty() && result.as_bytes()[0] != '_' as u8
-        && !(result.as_bytes()[0] as char).is_xid_start()
+    fn push(&mut self, text: &str) {
+        self.sanitize_and_append(text);
+    }
 }
diff --git a/src/librustc_target/spec/mod.rs b/src/librustc_target/spec/mod.rs
index e47da3cff95..aeecce49b0c 100644
--- a/src/librustc_target/spec/mod.rs
+++ b/src/librustc_target/spec/mod.rs
@@ -75,6 +75,7 @@ pub enum LinkerFlavor {
     Ld,
     Msvc,
     Lld(LldFlavor),
+    PtxLinker,
 }
 
 #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash,
@@ -143,6 +144,7 @@ flavor_mappings! {
     ((LinkerFlavor::Gcc), "gcc"),
     ((LinkerFlavor::Ld), "ld"),
     ((LinkerFlavor::Msvc), "msvc"),
+    ((LinkerFlavor::PtxLinker), "ptx-linker"),
     ((LinkerFlavor::Lld(LldFlavor::Wasm)), "wasm-ld"),
     ((LinkerFlavor::Lld(LldFlavor::Ld64)), "ld64.lld"),
     ((LinkerFlavor::Lld(LldFlavor::Ld)), "ld.lld"),
@@ -455,6 +457,8 @@ supported_targets! {
     ("x86_64-fortanix-unknown-sgx", x86_64_fortanix_unknown_sgx),
 
     ("x86_64-unknown-uefi", x86_64_unknown_uefi),
+
+    ("nvptx64-nvidia-cuda", nvptx64_nvidia_cuda),
 }
 
 /// Everything `rustc` knows about how to compile for a specific target.
diff --git a/src/librustc_target/spec/nvptx64_nvidia_cuda.rs b/src/librustc_target/spec/nvptx64_nvidia_cuda.rs
new file mode 100644
index 00000000000..e8512415e66
--- /dev/null
+++ b/src/librustc_target/spec/nvptx64_nvidia_cuda.rs
@@ -0,0 +1,73 @@
+use spec::{LinkerFlavor, Target, TargetOptions, TargetResult, PanicStrategy, MergeFunctions};
+use spec::abi::Abi;
+
+pub fn target() -> TargetResult {
+    Ok(Target {
+        arch: "nvptx64".to_string(),
+        data_layout: "e-i64:64-i128:128-v16:16-v32:32-n16:32:64".to_string(),
+        llvm_target: "nvptx64-nvidia-cuda".to_string(),
+
+        target_os: "cuda".to_string(),
+        target_vendor: "nvidia".to_string(),
+        target_env: String::new(),
+
+        linker_flavor: LinkerFlavor::PtxLinker,
+
+        target_endian: "little".to_string(),
+        target_pointer_width: "64".to_string(),
+        target_c_int_width: "32".to_string(),
+
+        options: TargetOptions {
+            // The linker can be installed from `crates.io`.
+            linker: Some("rust-ptx-linker".to_string()),
+
+            // With `ptx-linker` approach, it can be later overriden via link flags.
+            cpu: "sm_30".to_string(),
+
+            // FIXME: create tests for the atomics.
+            max_atomic_width: Some(64),
+
+            // Unwinding on CUDA is neither feasible nor useful.
+            panic_strategy: PanicStrategy::Abort,
+
+            // Needed to use `dylib` and `bin` crate types and the linker.
+            dynamic_linking: true,
+            executables: true,
+
+            // Avoid using dylib because it contain metadata not supported
+            // by LLVM NVPTX backend.
+            only_cdylib: true,
+
+            // Let the `ptx-linker` to handle LLVM lowering into MC / assembly.
+            obj_is_bitcode: true,
+
+            // Convinient and predicable naming scheme.
+            dll_prefix: "".to_string(),
+            dll_suffix: ".ptx".to_string(),
+            exe_suffix: ".ptx".to_string(),
+
+            // Disable MergeFunctions LLVM optimisation pass because it can
+            // produce kernel functions that call other kernel functions.
+            // This behavior is not supported by PTX ISA.
+            merge_functions: MergeFunctions::Disabled,
+
+            // FIXME: enable compilation tests for the target and
+            // create the tests for this.
+            abi_blacklist: vec![
+                Abi::Cdecl,
+                Abi::Stdcall,
+                Abi::Fastcall,
+                Abi::Vectorcall,
+                Abi::Thiscall,
+                Abi::Aapcs,
+                Abi::Win64,
+                Abi::SysV64,
+                Abi::Msp430Interrupt,
+                Abi::X86Interrupt,
+                Abi::AmdGpuKernel,
+            ],
+
+            .. Default::default()
+        },
+    })
+}
diff --git a/src/test/run-make/nvptx-binary-crate/Makefile b/src/test/run-make/nvptx-binary-crate/Makefile
new file mode 100644
index 00000000000..2c211b5c785
--- /dev/null
+++ b/src/test/run-make/nvptx-binary-crate/Makefile
@@ -0,0 +1,12 @@
+-include ../../run-make-fulldeps/tools.mk
+
+ifeq ($(TARGET),nvptx64-nvidia-cuda)
+all:
+	$(RUSTC) main.rs --crate-type="bin" --target $(TARGET) -O -C link-arg=--arch=sm_60 -o $(TMPDIR)/main.link_arg.ptx
+	$(RUSTC) main.rs --crate-type="bin" --target $(TARGET) -O -C target-cpu=sm_60 -o $(TMPDIR)/main.target_cpu.ptx
+
+	FileCheck main.rs --input-file $(TMPDIR)/main.link_arg.ptx
+	FileCheck main.rs --input-file $(TMPDIR)/main.target_cpu.ptx
+else
+all:
+endif
diff --git a/src/test/run-make/nvptx-binary-crate/main.rs b/src/test/run-make/nvptx-binary-crate/main.rs
new file mode 100644
index 00000000000..826bc3a47bb
--- /dev/null
+++ b/src/test/run-make/nvptx-binary-crate/main.rs
@@ -0,0 +1,28 @@
+#![no_std]
+#![no_main]
+#![deny(warnings)]
+#![feature(abi_ptx, core_intrinsics)]
+
+// Check the overriden CUDA arch.
+// CHECK: .target sm_60
+// CHECK: .address_size 64
+
+// Verify that no extra function declarations are present.
+// CHECK-NOT: .func
+
+// CHECK-LABEL: .visible .entry top_kernel(
+#[no_mangle]
+pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
+    // CHECK: add.s32 %{{r[0-9]+}}, %{{r[0-9]+}}, 5;
+    *b = *a + 5;
+}
+
+// Verify that no extra function definitions are there.
+// CHECK-NOT: .func
+// CHECK-NOT: .entry
+
+#[panic_handler]
+unsafe fn breakpoint_panic_handler(_: &::core::panic::PanicInfo) -> ! {
+    core::intrinsics::breakpoint();
+    core::hint::unreachable_unchecked();
+}
diff --git a/src/test/run-make/nvptx-dylib-crate/Makefile b/src/test/run-make/nvptx-dylib-crate/Makefile
new file mode 100644
index 00000000000..7284e9d1a7c
--- /dev/null
+++ b/src/test/run-make/nvptx-dylib-crate/Makefile
@@ -0,0 +1,10 @@
+-include ../../run-make-fulldeps/tools.mk
+
+ifeq ($(TARGET),nvptx64-nvidia-cuda)
+all:
+	$(RUSTC) dep.rs --crate-type="rlib" --target $(TARGET)
+	$(RUSTC) kernel.rs --crate-type="cdylib" -O --target $(TARGET)
+	FileCheck kernel.rs --input-file $(TMPDIR)/kernel.ptx
+else
+all:
+endif
diff --git a/src/test/run-make/nvptx-dylib-crate/dep.rs b/src/test/run-make/nvptx-dylib-crate/dep.rs
new file mode 100644
index 00000000000..57f3ee87cdb
--- /dev/null
+++ b/src/test/run-make/nvptx-dylib-crate/dep.rs
@@ -0,0 +1,14 @@
+#![no_std]
+#![deny(warnings)]
+
+#[inline(never)]
+#[no_mangle]
+pub fn wrapping_external_fn(a: u32) -> u32 {
+    a.wrapping_mul(a)
+}
+
+#[inline(never)]
+#[no_mangle]
+pub fn panicking_external_fn(a: u32) -> u32 {
+    a * a
+}
diff --git a/src/test/run-make/nvptx-dylib-crate/kernel.rs b/src/test/run-make/nvptx-dylib-crate/kernel.rs
new file mode 100644
index 00000000000..63fd6b063dd
--- /dev/null
+++ b/src/test/run-make/nvptx-dylib-crate/kernel.rs
@@ -0,0 +1,59 @@
+#![no_std]
+#![deny(warnings)]
+#![feature(abi_ptx, core_intrinsics)]
+
+extern crate dep;
+
+// Verify the default CUDA arch.
+// CHECK: .target sm_30
+// CHECK: .address_size 64
+
+// Make sure declarations are there.
+// CHECK: .func (.param .b32 func_retval0) wrapping_external_fn
+// CHECK: .func (.param .b32 func_retval0) panicking_external_fn
+// CHECK: .func [[PANIC_HANDLER:_ZN4core9panicking5panic[a-zA-Z0-9]+]]
+
+// CHECK-LABEL: .visible .entry top_kernel(
+#[no_mangle]
+pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
+    // CHECK:      call.uni (retval0),
+    // CHECK-NEXT: wrapping_external_fn
+    // CHECK:      ld.param.b32 %[[LHS:r[0-9]+]], [retval0+0];
+    let lhs = dep::wrapping_external_fn(*a);
+
+    // CHECK:      call.uni (retval0),
+    // CHECK-NEXT: panicking_external_fn
+    // CHECK:      ld.param.b32 %[[RHS:r[0-9]+]], [retval0+0];
+    let rhs = dep::panicking_external_fn(*a);
+
+    // CHECK: add.s32 %[[RES:r[0-9]+]], %[[RHS]], %[[LHS]];
+    // CHECK: st.global.u32 [%{{rd[0-9]+}}], %[[RES]];
+    *b = lhs + rhs;
+}
+
+// Verify that external function bodies are available.
+// CHECK-LABEL: .func (.param .b32 func_retval0) wrapping_external_fn
+// CHECK: {
+// CHECK:   st.param.b32 [func_retval0+0], %{{r[0-9]+}};
+// CHECK: }
+
+// Also verify panic behavior.
+// CHECK-LABEL: .func (.param .b32 func_retval0) panicking_external_fn
+// CHECK: {
+// CHECK:   %{{p[0-9]+}} bra [[PANIC_LABEL:[a-zA-Z0-9_]+]];
+// CHECK: [[PANIC_LABEL]]:
+// CHECK:   call.uni
+// CHECK:   [[PANIC_HANDLER]]
+// CHECK: }
+
+// Verify whether out dummy panic formatter has a correct body.
+// CHECK: .func [[PANIC_FMT:_ZN4core9panicking9panic_fmt[a-zA-Z0-9]+]]()
+// CHECK: {
+// CHECK:   trap;
+// CHECK: }
+
+#[panic_handler]
+unsafe fn breakpoint_panic_handler(_: &::core::panic::PanicInfo) -> ! {
+    core::intrinsics::breakpoint();
+    core::hint::unreachable_unchecked();
+}
diff --git a/src/test/run-make/nvptx-emit-asm/Makefile b/src/test/run-make/nvptx-emit-asm/Makefile
new file mode 100644
index 00000000000..e03601878bd
--- /dev/null
+++ b/src/test/run-make/nvptx-emit-asm/Makefile
@@ -0,0 +1,9 @@
+-include ../../run-make-fulldeps/tools.mk
+
+ifeq ($(TARGET),nvptx64-nvidia-cuda)
+all:
+	$(RUSTC) kernel.rs --crate-type="rlib" --emit asm,llvm-ir -O --target $(TARGET)
+	FileCheck kernel.rs --input-file $(TMPDIR)/kernel.s
+else
+all:
+endif
diff --git a/src/test/run-make/nvptx-emit-asm/kernel.rs b/src/test/run-make/nvptx-emit-asm/kernel.rs
new file mode 100644
index 00000000000..b71e18d9103
--- /dev/null
+++ b/src/test/run-make/nvptx-emit-asm/kernel.rs
@@ -0,0 +1,41 @@
+#![no_std]
+#![deny(warnings)]
+#![feature(abi_ptx)]
+
+// Verify the default CUDA arch.
+// CHECK: .target sm_30
+// CHECK: .address_size 64
+
+// Verify function name doesn't contain unacceaptable characters.
+// CHECK: .func (.param .b32 func_retval0) [[IMPL_FN:_ZN[a-zA-Z0-9$_]+square[a-zA-Z0-9$_]+]]
+
+// CHECK-LABEL: .visible .entry top_kernel(
+#[no_mangle]
+pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
+    // CHECK:      call.uni (retval0),
+    // CHECK-NEXT: [[IMPL_FN]]
+    *b = deep::private::MyStruct::new(*a).square();
+}
+
+pub mod deep {
+    pub mod private {
+        pub struct MyStruct<T>(T);
+
+        impl MyStruct<u32> {
+            pub fn new(a: u32) -> Self {
+                MyStruct(a)
+            }
+
+            #[inline(never)]
+            pub fn square(&self) -> u32 {
+                self.0.wrapping_mul(self.0)
+            }
+        }
+    }
+}
+
+// Verify that external function bodies are available.
+// CHECK: .func (.param .b32 func_retval0) [[IMPL_FN]]
+// CHECK: {
+// CHECK:   mul.lo.s32 %{{r[0-9]+}}, %{{r[0-9]+}}, %{{r[0-9]+}}
+// CHECK: }