Rollup merge of #142097 - ZuseZ4:offload-host1, r=oli-obk

gpu offload host code generation r? ghost This will generate most of the host side code to use llvm's offload feature. The first PR will only handle automatic mem-transfers to and from the device. So if a user calls a kernel, we will copy inputs back and forth, but we won't do the actual kernel launch. Before merging, we will use LLVM's Info infrastructure to verify that the memcopies match what openmp offloa generates in C++. `LIBOMPTARGET_INFO=-1 ./my_rust_binary` should print that a memcpy to and later from the device is happening. A follow-up PR will generate the actual device-side kernel which will then do computations on the GPU. A third PR will implement manual host2device and device2host functionality, but the goal is to minimize cases where a user has to overwrite our default handling due to performance issues. I'm trying to get a full MVP out first, so this just recognizes GPU functions based on magic names. The final frontend will obviously move this over to use proper macros, like I'm already doing it for the autodiff work. This work will also be compatible with std::autodiff, so one can differentiate GPU kernels. Tracking: - https://github.com/rust-lang/rust/issues/131513
author: 许杰友 Jieyou Xu (Joe) <39484203+jieyouxu@users.noreply.github.com> 2025-07-22 00:54:24 +0800
committer: GitHub <noreply@github.com> 2025-07-22 00:54:24 +0800
commit: 5e3eb2512591df0cef52404f0ea4202f58935a54 (patch)
tree: 69324c15901f5c3341342f35ba348de15b24bea5 /compiler/rustc_codegen_llvm/src/builder.rs
parent: 3f9f20f71dd945fe7d044e274094a53c90788269 (diff)
parent: c068599173f8670caeeda252f115cc28445f7df0 (diff)
download: rust-5e3eb2512591df0cef52404f0ea4202f58935a54.tar.gz
rust-5e3eb2512591df0cef52404f0ea4202f58935a54.zip
1 files changed, 69 insertions, 0 deletions
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 514923ad6f3..0ade9edb0d2 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -3,6 +3,7 @@ use std::ops::Deref;
 use std::{iter, ptr};
 
 pub(crate) mod autodiff;
+pub(crate) mod gpu_offload;
 
 use libc::{c_char, c_uint, size_t};
 use rustc_abi as abi;
@@ -117,6 +118,74 @@ impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
         }
         bx
     }
+
+    // The generic builder has less functionality and thus (unlike the other alloca) we can not
+    // easily jump to the beginning of the function to place our allocas there. We trust the user
+    // to manually do that. FIXME(offload): improve the genericCx and add more llvm wrappers to
+    // handle this.
+    pub(crate) fn direct_alloca(&mut self, ty: &'ll Type, align: Align, name: &str) -> &'ll Value {
+        let val = unsafe {
+            let alloca = llvm::LLVMBuildAlloca(self.llbuilder, ty, UNNAMED);
+            llvm::LLVMSetAlignment(alloca, align.bytes() as c_uint);
+            // Cast to default addrspace if necessary
+            llvm::LLVMBuildPointerCast(self.llbuilder, alloca, self.cx.type_ptr(), UNNAMED)
+        };
+        if name != "" {
+            let name = std::ffi::CString::new(name).unwrap();
+            llvm::set_value_name(val, &name.as_bytes());
+        }
+        val
+    }
+
+    pub(crate) fn inbounds_gep(
+        &mut self,
+        ty: &'ll Type,
+        ptr: &'ll Value,
+        indices: &[&'ll Value],
+    ) -> &'ll Value {
+        unsafe {
+            llvm::LLVMBuildGEPWithNoWrapFlags(
+                self.llbuilder,
+                ty,
+                ptr,
+                indices.as_ptr(),
+                indices.len() as c_uint,
+                UNNAMED,
+                GEPNoWrapFlags::InBounds,
+            )
+        }
+    }
+
+    pub(crate) fn store(&mut self, val: &'ll Value, ptr: &'ll Value, align: Align) -> &'ll Value {
+        debug!("Store {:?} -> {:?}", val, ptr);
+        assert_eq!(self.cx.type_kind(self.cx.val_ty(ptr)), TypeKind::Pointer);
+        unsafe {
+            let store = llvm::LLVMBuildStore(self.llbuilder, val, ptr);
+            llvm::LLVMSetAlignment(store, align.bytes() as c_uint);
+            store
+        }
+    }
+
+    pub(crate) fn load(&mut self, ty: &'ll Type, ptr: &'ll Value, align: Align) -> &'ll Value {
+        unsafe {
+            let load = llvm::LLVMBuildLoad2(self.llbuilder, ty, ptr, UNNAMED);
+            llvm::LLVMSetAlignment(load, align.bytes() as c_uint);
+            load
+        }
+    }
+
+    fn memset(&mut self, ptr: &'ll Value, fill_byte: &'ll Value, size: &'ll Value, align: Align) {
+        unsafe {
+            llvm::LLVMRustBuildMemSet(
+                self.llbuilder,
+                ptr,
+                align.bytes() as c_uint,
+                fill_byte,
+                size,
+                false,
+            );
+        }
+    }
 }
 
 /// Empty string, to be used where LLVM expects an instruction name, indicating
author	许杰友 Jieyou Xu (Joe) <39484203+jieyouxu@users.noreply.github.com>	2025-07-22 00:54:24 +0800
committer	GitHub <noreply@github.com>	2025-07-22 00:54:24 +0800
commit	5e3eb2512591df0cef52404f0ea4202f58935a54 (patch)
tree	69324c15901f5c3341342f35ba348de15b24bea5 /compiler/rustc_codegen_llvm/src/builder.rs
parent	3f9f20f71dd945fe7d044e274094a53c90788269 (diff)
parent	c068599173f8670caeeda252f115cc28445f7df0 (diff)
download	rust-5e3eb2512591df0cef52404f0ea4202f58935a54.tar.gz rust-5e3eb2512591df0cef52404f0ea4202f58935a54.zip