Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions compiler/rustc_codegen_llvm/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,13 @@ pub(crate) fn compile_codegen_unit(
// They are necessary for correct offload execution. We do this here to simplify the
// `offload` intrinsic, avoiding the need for tracking whether it's the first
// intrinsic call or not.
let has_host_offload =
cx.sess().opts.unstable_opts.offload.iter().any(|o| matches!(o, Offload::Host(_)));
let has_host_offload = cx
.sess()
.opts
.unstable_opts
.offload
.iter()
.any(|o| matches!(o, Offload::Host(_) | Offload::Test));
if has_host_offload && !cx.sess().target.is_like_gpu {
cx.offload_globals.replace(Some(OffloadGlobals::declare(&cx)));
}
Expand Down
5 changes: 3 additions & 2 deletions compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ impl<'ll> OffloadGlobals<'ll> {
let bin_desc = cx.type_named_struct("struct.__tgt_bin_desc");
cx.set_struct_body(bin_desc, &tgt_bin_desc_ty, false);

let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", mapper_fn_ty);
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", mapper_fn_ty);
let reg_lib_decl = cx.type_func(&[cx.type_ptr()], cx.type_void());
let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", reg_lib_decl);
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", reg_lib_decl);
let init_ty = cx.type_func(&[], cx.type_void());
let init_rtls = declare_offload_fn(cx, "__tgt_init_all_rtls", init_ty);

Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_session/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ pub enum Offload {
Device,
/// Second step in the offload pipeline, generates the host code to call kernels.
Host(String),
/// Test is similar to Host, but allows testing without a device artifact.
Test,
}

/// The different settings that the `-Z autodiff` flag can have.
Expand Down
10 changes: 9 additions & 1 deletion compiler/rustc_session/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,8 @@ mod desc {
pub(crate) const parse_list_with_polarity: &str =
"a comma-separated list of strings, with elements beginning with + or -";
pub(crate) const parse_autodiff: &str = "a comma separated list of settings: `Enable`, `PrintSteps`, `PrintTA`, `PrintTAFn`, `PrintAA`, `PrintPerf`, `PrintModBefore`, `PrintModAfter`, `PrintModFinal`, `PrintPasses`, `NoPostopt`, `LooseTypes`, `Inline`, `NoTT`";
pub(crate) const parse_offload: &str = "a comma separated list of settings: `Enable`";
pub(crate) const parse_offload: &str =
"a comma separated list of settings: `Host=<Absolute-Path>`, `Device`, `Test`";
pub(crate) const parse_comma_list: &str = "a comma-separated list of strings";
pub(crate) const parse_opt_comma_list: &str = parse_comma_list;
pub(crate) const parse_number: &str = "a number";
Expand Down Expand Up @@ -1472,6 +1473,13 @@ pub mod parse {
}
Offload::Device
}
"Test" => {
if let Some(_) = arg {
// Test does not accept a value
return false;
}
Offload::Test
}
_ => {
// FIXME(ZuseZ4): print an error saying which value is not recognized
return false;
Expand Down
42 changes: 19 additions & 23 deletions tests/codegen-llvm/gpu_offload/gpu_host.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
//@ compile-flags: -Zoffload=Enable -Zunstable-options -C opt-level=3 -Clto=fat
//@ compile-flags: -Zoffload=Test -Zunstable-options -C opt-level=3 -Clto=fat
//@ no-prefer-dynamic
//@ needs-enzyme
//@ needs-offload

// This test is verifying that we generate __tgt_target_data_*_mapper before and after a call to the
// kernel_1. Better documentation to what each global or variable means is available in the gpu
// offlaod code, or the LLVM offload documentation. This code does not launch any GPU kernels yet,
// and will be rewritten once a proper offload frontend has landed.
//
// We currently only handle memory transfer for specific calls to functions named `kernel_{num}`,
// when inside of a function called main. This, too, is a temporary workaround for not having a
// frontend.
// offlaod code, or the LLVM offload documentation.

#![feature(rustc_attrs)]
#![feature(core_intrinsics)]
Expand All @@ -22,6 +17,20 @@ fn main() {
core::hint::black_box(&x);
}

#[unsafe(no_mangle)]
#[inline(never)]
pub fn kernel_1(x: &mut [f32; 256]) {
core::intrinsics::offload(_kernel_1, (x,))
}

#[unsafe(no_mangle)]
#[inline(never)]
pub fn _kernel_1(x: &mut [f32; 256]) {
for i in 0..256 {
x[i] = 21.0;
}
}

// CHECK: %struct.ident_t = type { i32, i32, i32, i32, ptr }
// CHECK: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr }
// CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
Expand All @@ -36,8 +45,9 @@ fn main() {
// CHECK: @.offloading.entry_name._kernel_1 = internal unnamed_addr constant [10 x i8] c"_kernel_1\00", section ".llvm.rodata.offloading", align 1
// CHECK: @.offloading.entry._kernel_1 = internal constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel_1.region_id, ptr @.offloading.entry_name._kernel_1, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8

// CHECK: Function Attrs: nounwind
// CHECK: declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr)
// CHECK: declare void @__tgt_register_lib(ptr) local_unnamed_addr
// CHECK: declare void @__tgt_unregister_lib(ptr) local_unnamed_addr

// CHECK: define{{( dso_local)?}} void @main()
// CHECK-NEXT: start:
Expand Down Expand Up @@ -94,17 +104,3 @@ fn main() {
// CHECK-NEXT: call void @__tgt_unregister_lib(ptr nonnull %EmptyDesc)
// CHECK-NEXT: ret void
// CHECK-NEXT: }

#[unsafe(no_mangle)]
#[inline(never)]
pub fn kernel_1(x: &mut [f32; 256]) {
core::intrinsics::offload(_kernel_1, (x,))
}

#[unsafe(no_mangle)]
#[inline(never)]
pub fn _kernel_1(x: &mut [f32; 256]) {
for i in 0..256 {
x[i] = 21.0;
}
}
Loading