Skip to content

Commit

Permalink
[New offload driver][sycl-post-link] Move sycl-post-link target speci…
Browse files Browse the repository at this point in the history
…fic options generation to linker wrapper (intel#14101)

There are some sycl-post-link options that are dependent on target
triples and some options that are dependent on user options and some
options that are dependent on both.
In this change, we set up all the sycl-post-link options in the driver
and then pass them to the clang-linker-wrapper tool. Before calling
sycl-post-link functionality, the clang-linker-wrapper will update these
options based on the target triple of the image being processed.

Several tests have been updated as ordering of options have changed.

Thanks

---------

Signed-off-by: Arvind Sudarsanam <[email protected]>
  • Loading branch information
asudarsa authored Jun 11, 2024
1 parent a4c3019 commit c2cdfcc
Show file tree
Hide file tree
Showing 11 changed files with 134 additions and 56 deletions.
71 changes: 36 additions & 35 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10620,10 +10620,10 @@ static void addArgs(ArgStringList &DstArgs, const llvm::opt::ArgList &Alloc,
}
}

static void getOtherSYCLPostLinkOpts(const ToolChain &TC, const JobAction &JA,
const llvm::opt::ArgList &TCArgs,
ArgStringList &PostLinkArgs,
bool SpecConsts, types::ID OutputType) {
static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC,
const JobAction &JA,
const llvm::opt::ArgList &TCArgs,
ArgStringList &PostLinkArgs) {
// See if device code splitting is requested
if (Arg *A = TCArgs.getLastArg(options::OPT_fsycl_device_code_split_EQ)) {
auto CodeSplitValue = StringRef(A->getValue());
Expand All @@ -10636,19 +10636,8 @@ static void getOtherSYCLPostLinkOpts(const ToolChain &TC, const JobAction &JA,
else { // Device code split is off
}
}
if (OutputType == types::TY_LLVM_BC) {
// single file output requested - this means only perform necessary IR
// transformations (like specialization constant intrinsic lowering) and
// output LLVMIR
addArgs(PostLinkArgs, TCArgs, {"-ir-output-only"});
}
addArgs(PostLinkArgs, TCArgs,
{StringRef(getSYCLPostLinkOptimizationLevel(TCArgs))});
// specialization constants processing is mandatory
if (SpecConsts)
addArgs(PostLinkArgs, TCArgs, {"-spec-const=native"});
else
addArgs(PostLinkArgs, TCArgs, {"-spec-const=emulation"});

// Process device-globals.
addArgs(PostLinkArgs, TCArgs, {"-device-globals"});
Expand All @@ -10665,31 +10654,47 @@ getTripleBasedSYCLPostLinkOpts(const ToolChain &TC, const JobAction &JA,
const llvm::opt::ArgList &TCArgs,
llvm::Triple Triple, ArgStringList &PostLinkArgs,
bool SpecConsts, types::ID OutputType) {
bool NewOffloadDriver = TC.getDriver().getUseNewOffloadingDriver();
// Note: Do not use Triple when NewOffloadDriver is 'true'.
if (!NewOffloadDriver && (OutputType == types::TY_LLVM_BC)) {
// single file output requested - this means only perform necessary IR
// transformations (like specialization constant intrinsic lowering) and
// output LLVMIR
addArgs(PostLinkArgs, TCArgs, {"-ir-output-only"});
}
// specialization constants processing is mandatory
if (SpecConsts)
addArgs(PostLinkArgs, TCArgs, {"-spec-const=native"});
else
addArgs(PostLinkArgs, TCArgs, {"-spec-const=emulation"});

// See if device code splitting is requested. The logic here works along side
// the behavior in setOtherSYCLPostLinkOpts, where the option is added based
// on the user setting of-fsycl-device-code-split.
if (!(TCArgs.hasArg(options::OPT_fsycl_device_code_split_EQ) ||
Triple.getArchName() == "spir64_fpga"))
if (!TCArgs.hasArg(options::OPT_fsycl_device_code_split_EQ) &&
(NewOffloadDriver || !(Triple.getArchName() == "spir64_fpga")))
addArgs(PostLinkArgs, TCArgs, {"-split=auto"});

// On Intel targets we don't need non-kernel functions as entry points,
// because it only increases amount of code for device compiler to handle,
// without any actual benefits.
// TODO: Try to extend this feature for non-Intel GPUs.
if (!TCArgs.hasFlag(options::OPT_fno_sycl_remove_unused_external_funcs,
options::OPT_fsycl_remove_unused_external_funcs, false) &&
!Triple.isNVPTX() && !Triple.isAMDGPU() && !isSYCLNativeCPU(TC))
if ((!TCArgs.hasFlag(options::OPT_fno_sycl_remove_unused_external_funcs,
options::OPT_fsycl_remove_unused_external_funcs,
false) &&
!isSYCLNativeCPU(TC)) &&
(NewOffloadDriver || (!Triple.isNVPTX() && !Triple.isAMDGPU())))
addArgs(PostLinkArgs, TCArgs, {"-emit-only-kernels-as-entry-points"});

if (!(Triple.isAMDGCN()))
if (!NewOffloadDriver && !Triple.isAMDGCN())
addArgs(PostLinkArgs, TCArgs, {"-emit-param-info"});
// Enable PI program metadata
if (Triple.isNVPTX() || Triple.isAMDGCN() || isSYCLNativeCPU(TC))
// Enable program metadata
if ((!NewOffloadDriver && (Triple.isNVPTX() || Triple.isAMDGCN())) ||
isSYCLNativeCPU(TC))
addArgs(PostLinkArgs, TCArgs, {"-emit-program-metadata"});
if (OutputType != types::TY_LLVM_BC) {
assert(OutputType == types::TY_Tempfiletable);
bool SplitEsimdByDefault = Triple.isSPIROrSPIRV();
bool SplitEsimdByDefault = !NewOffloadDriver && Triple.isSPIROrSPIRV();
bool SplitEsimd = TCArgs.hasFlag(
options::OPT_fsycl_device_code_split_esimd,
options::OPT_fno_sycl_device_code_split_esimd, SplitEsimdByDefault);
Expand All @@ -10701,14 +10706,14 @@ getTripleBasedSYCLPostLinkOpts(const ToolChain &TC, const JobAction &JA,
addArgs(PostLinkArgs, TCArgs, {"-split-esimd"});
addArgs(PostLinkArgs, TCArgs, {"-lower-esimd"});
}
bool isAOT = Triple.isNVPTX() || Triple.isAMDGCN() ||
bool IsAOT = Triple.isNVPTX() || Triple.isAMDGCN() ||
Triple.getSubArch() == llvm::Triple::SPIRSubArch_fpga ||
Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen ||
Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64;
if (TCArgs.hasFlag(options::OPT_fsycl_add_default_spec_consts_image,
options::OPT_fno_sycl_add_default_spec_consts_image,
false) &&
isAOT)
(IsAOT || NewOffloadDriver))
addArgs(PostLinkArgs, TCArgs,
{"-generate-device-image-default-spec-consts"});
}
Expand All @@ -10731,9 +10736,7 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
ArgStringList CmdArgs;

llvm::Triple T = getToolChain().getTriple();
getOtherSYCLPostLinkOpts(getToolChain(), JA, TCArgs, CmdArgs,
SYCLPostLink->getRTSetsSpecConstants(),
SYCLPostLink->getTrueType());
getNonTripleBasedSYCLPostLinkOpts(getToolChain(), JA, TCArgs, CmdArgs);
getTripleBasedSYCLPostLinkOpts(getToolChain(), JA, TCArgs, T, CmdArgs,
SYCLPostLink->getRTSetsSpecConstants(),
SYCLPostLink->getTrueType());
Expand Down Expand Up @@ -11135,13 +11138,11 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
types::ID OutputType = TargetTriple.isSPIROrSPIRV() || IsSYCLNativeCPU
? types::TY_Tempfiletable
: types::TY_LLVM_BC;
// TODO: Items like native_cpu and Specialization Constants behaviors are
// dependent on each toolchain. Passing these along as 'general settings'
// for the clang-linker-wrapper causes for potential inconsistencies and
// would need to handled more at the device linking level.
bool SpecConsts = TargetTriple.isSPIROrSPIRV();
getOtherSYCLPostLinkOpts(getToolChain(), JA, Args, PostLinkArgs, SpecConsts,
OutputType);
getNonTripleBasedSYCLPostLinkOpts(getToolChain(), JA, Args, PostLinkArgs);
// Some options like -spec-consts=* depend on target triple as well as some
// user options. So, these options are partly computed here and then
// updated inside the clang-linker-wrapper.
getTripleBasedSYCLPostLinkOpts(getToolChain(), JA, Args, TargetTriple,
PostLinkArgs, SpecConsts, OutputType);
for (const auto &A : PostLinkArgs)
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Driver/linker-wrapper-sycl-win.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// CHK-CMDS: "{{.*}}spirv-to-ir-wrapper.exe" {{.*}} -o [[FIRSTLLVMLINKIN:.*]].bc --llvm-spirv-opts=--spirv-preserve-auxdata --llvm-spirv-opts=--spirv-target-env=SPV-IR --llvm-spirv-opts=--spirv-builtin-format=global
// CHK-CMDS-NEXT: "{{.*}}llvm-link.exe" [[FIRSTLLVMLINKIN:.*]].bc -o [[FIRSTLLVMLINKOUT:.*]].bc --suppress-warnings
// CHK-CMDS-NEXT: "{{.*}}llvm-link.exe" -only-needed [[FIRSTLLVMLINKOUT]].bc {{.*}}.bc {{.*}}.bc -o [[SECONDLLVMLINKOUT:.*]].bc --suppress-warnings
// CHK-CMDS-NEXT: "{{.*}}sycl-post-link.exe" SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
// CHK-CMDS-NEXT: "{{.*}}sycl-post-link.exe" SYCL_POST_LINK_OPTIONS{{.*}} -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
// LLVM-SPIRV is not called in dry-run
// CHK-CMDS-NEXT: offload-wrapper: input: [[LLVMSPIRVOUT:.*]].table, output: [[WRAPPEROUT:.*]].bc
// CHK-CMDS-NEXT: "{{.*}}llc.exe" -filetype=obj -o [[LLCOUT:.*]].o [[WRAPPEROUT]].bc
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Driver/linker-wrapper-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// CHK-CMDS: "{{.*}}spirv-to-ir-wrapper" {{.*}} -o [[FIRSTLLVMLINKIN:.*]].bc --llvm-spirv-opts=--spirv-preserve-auxdata --llvm-spirv-opts=--spirv-target-env=SPV-IR --llvm-spirv-opts=--spirv-builtin-format=global
// CHK-CMDS-NEXT: "{{.*}}llvm-link" [[FIRSTLLVMLINKIN:.*]].bc -o [[FIRSTLLVMLINKOUT:.*]].bc --suppress-warnings
// CHK-CMDS-NEXT: "{{.*}}llvm-link" -only-needed [[FIRSTLLVMLINKOUT]].bc {{.*}}.bc {{.*}}.bc -o [[SECONDLLVMLINKOUT:.*]].bc --suppress-warnings
// CHK-CMDS-NEXT: "{{.*}}sycl-post-link" SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
// CHK-CMDS-NEXT: "{{.*}}sycl-post-link" SYCL_POST_LINK_OPTIONS {{.*}} -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
// LLVM-SPIRV is not called in dry-run
// CHK-CMDS-NEXT: offload-wrapper: input: [[LLVMSPIRVOUT:.*]].table, output: [[WRAPPEROUT:.*]].bc
// CHK-CMDS-NEXT: "{{.*}}llc" -filetype=obj -o [[LLCOUT:.*]].o [[WRAPPEROUT]].bc
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Driver/sycl-intelfpga-aoco-win.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
// CHK-FPGA-AOCO: spirv-to-ir-wrapper{{.*}} "[[LIBLIST]]" "-o" "[[LIBLIST2:.+\.txt]]"
// CHK-FPGA-AOCO: llvm-link{{.*}} "-o" "[[LINKEDBC:.+\.bc]]"
// CHK-FPGA-AOCO: llvm-link{{.*}} "--only-needed" "[[LINKEDBC]]" "@[[LIBLIST2]]" "-o" "[[LINKEDBC2:.+\.bc]]"
// CHK-FPGA-AOCO: sycl-post-link{{.*}} "-spec-const=emulation" "-device-globals"{{.*}} "-o" "[[SPLTABLE:.+\.table]]" "[[LINKEDBC2]]"
// CHK-FPGA-AOCO: sycl-post-link{{.*}} "-device-globals"{{.*}} "-spec-const=emulation"{{.*}} "-o" "[[SPLTABLE:.+\.table]]" "[[LINKEDBC2]]"
// CHK-FPGA-AOCO: file-table-tform{{.*}} "-o" "[[TABLEOUT:.+\.txt]]" "[[SPLTABLE]]"
// CHK-FPGA-AOCO: llvm-spirv{{.*}} "-o" "[[TARGSPV:.+\.txt]]" {{.*}} "[[TABLEOUT]]"
// CHK-FPGA-AOCO: clang-offload-bundler{{.*}} "-type=aoo" "-targets=sycl-fpga_aoco-intel-unknown" "-input=[[INPUTLIB]]" "-output=[[AOCOLIST:.+\.txt]]" "-unbundle"
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/sycl-intelfpga-aoco.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
// CHK-FPGA-AOCO: spirv-to-ir-wrapper{{.*}} "[[LIBLIST]]" "-o" "[[LIBLIST2:.+\.txt]]"
// CHK-FPGA-AOCO: llvm-link{{.*}} "-o" "[[LINKEDBC:.+\.bc]]"
// CHK-FPGA-AOCO: llvm-link{{.*}} "--only-needed" "[[LINKEDBC]]" "@[[LIBLIST2]]" "-o" "[[LINKEDBC2:.+\.bc]]"
// CHK-FPGA-AOCO: sycl-post-link{{.*}} "-spec-const=emulation" "-device-globals"{{.*}} "-o" "[[SPLTABLE:.+\.table]]" "[[LINKEDBC2]]"
// CHK-FPGA-AOCO: sycl-post-link{{.*}} "-device-globals"{{.*}} "-spec-const=emulation"{{.*}} "-o" "[[SPLTABLE:.+\.table]]" "[[LINKEDBC2]]"
// CHK-FPGA-AOCO: file-table-tform{{.*}} "-o" "[[TABLEOUT:.+\.txt]]" "[[SPLTABLE]]"
// CHK-FPGA-AOCO: llvm-spirv{{.*}} "-o" "[[TARGSPV:.+\.txt]]" {{.*}} "[[TABLEOUT]]"
// CHK-FPGA-AOCO: clang-offload-bundler{{.*}} "-type=aoo" "-targets=sycl-fpga_aoco-intel-unknown" "-input=[[INPUTLIB]]" "-output=[[AOCOLIST:.+\.txt]]" "-unbundle"
Expand Down Expand Up @@ -105,7 +105,7 @@
// CHK-FPGA-AOCO-EMU: clang-offload-bundler{{.*}} "-type=aoo" "-targets=sycl-spir64_fpga-unknown-unknown" "-input=[[INPUTLIB:.+\.a]]" "-output=[[OUTLIB:.+\.txt]]" "-unbundle"
// CHK-FPGA-AOCO-EMU: llvm-foreach{{.*}} "--out-ext=txt" "--in-file-list=[[OUTLIB]]" "--in-replace=[[OUTLIB]]" "--out-file-list=[[DEVICELIST:.+\.txt]]" "--out-replace=[[DEVICELIST]]" "--" {{.*}}spirv-to-ir-wrapper{{.*}} "[[OUTLIB]]" "-o" "[[DEVICELIST]]"
// CHK-FPGA-AOCO-EMU: llvm-link{{.*}} "@[[DEVICELIST]]" "-o" "[[LINKEDBC:.+\.bc]]"
// CHK-FPGA-AOCO-EMU: sycl-post-link{{.*}} "-O2" "-spec-const=emulation" "-device-globals"{{.*}} "-o" "[[SPLTABLE:.+\.table]]" "[[LINKEDBC]]"
// CHK-FPGA-AOCO-EMU: sycl-post-link{{.*}} "-O2" "-device-globals"{{.*}} "-spec-const=emulation"{{.*}} "-o" "[[SPLTABLE:.+\.table]]" "[[LINKEDBC]]"
// CHK-FPGA-AOCO-EMU: file-table-tform{{.*}} "-o" "[[TABLEOUT:.+\.txt]]" "[[SPLTABLE]]"
// CHK-FPGA-AOCO-EMU: llvm-spirv{{.*}} "-o" "[[TARGSPV:.+\.txt]]" {{.*}} "[[TABLEOUT]]"
// CHK-FPGA-AOCO-EMU: opencl-aot{{.*}} "-device=fpga_fast_emu" "-spv=[[TARGSPV]]" "-ir=[[AOCXOUT:.+\.aocx]]"
Expand Down
5 changes: 3 additions & 2 deletions clang/test/Driver/sycl-linker-wrapper-image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,14 @@ int main() {
// CHECK-DAG: @prop_val = internal unnamed_addr constant [8 x i8] zeroinitializer
// CHECK-DAG: @__sycl_offload_prop_sets_arr.2 = internal constant [1 x %_pi_device_binary_property_struct] [%_pi_device_binary_property_struct { ptr @prop.1, ptr @prop_val, i32 2, i64 8 }]
// CHECK-DAG: @SYCL_PropSetName.3 = internal unnamed_addr constant [25 x i8] c"SYCL/device requirements\00"
// CHECK-DAG: @__sycl_offload_prop_sets_arr.4 = internal constant [2 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr inbounds ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 1, i64 0) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.3, ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr inbounds ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 1, i64 0) }]
// CHECK-DAG: @SYCL_PropSetName.4 = internal unnamed_addr constant [22 x i8] c"SYCL/kernel param opt\00"
// CHECK-DAG: @__sycl_offload_prop_sets_arr.5 = internal constant [3 x %_pi_device_binary_property_set_struct] [%_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName, ptr @__sycl_offload_prop_sets_arr, ptr getelementptr inbounds ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr, i64 1, i64 0) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.3, ptr @__sycl_offload_prop_sets_arr.2, ptr getelementptr inbounds ([1 x %_pi_device_binary_property_struct], ptr @__sycl_offload_prop_sets_arr.2, i64 1, i64 0) }, %_pi_device_binary_property_set_struct { ptr @SYCL_PropSetName.4, ptr null, ptr null }]
// CHECK-DAG: @.sycl_offloading.0.data = internal unnamed_addr constant [740 x i8]
// CHECK-DAG: @__sycl_offload_entry_name = internal unnamed_addr constant [25 x i8] c"_ZTSZ4mainE11fake_kernel\00"
// CHECK-DAG: @__sycl_offload_entries_arr = internal constant [1 x %struct.__tgt_offload_entry] [%struct.__tgt_offload_entry { ptr null, ptr @__sycl_offload_entry_name, i64 0, i32 0, i32 0 }]
// CHECK-DAG: @.sycl_offloading.0.info = internal local_unnamed_addr constant [2 x i64] [i64 ptrtoint (ptr @.sycl_offloading.0.data to i64), i64 740], section ".tgtimg", align 16
// CHECK-DAG: @llvm.used = appending global [1 x ptr] [ptr @.sycl_offloading.0.info], section "llvm.metadata"
// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 2, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr null, ptr null, ptr @.sycl_offloading.0.data, ptr getelementptr inbounds ([740 x i8], ptr @.sycl_offloading.0.data, i64 1, i64 0), ptr @__sycl_offload_entries_arr, ptr getelementptr inbounds ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 1, i64 0), ptr @__sycl_offload_prop_sets_arr.4, ptr getelementptr inbounds ([2 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.4, i64 1, i64 0) }]
// CHECK-DAG: @.sycl_offloading.device_images = internal unnamed_addr constant [1 x %__sycl.tgt_device_image] [%__sycl.tgt_device_image { i16 2, i8 4, i8 0, ptr @.sycl_offloading.target.0, ptr @.sycl_offloading.opts.compile.0, ptr @.sycl_offloading.opts.link.0, ptr null, ptr null, ptr @.sycl_offloading.0.data, ptr getelementptr inbounds ([740 x i8], ptr @.sycl_offloading.0.data, i64 1, i64 0), ptr @__sycl_offload_entries_arr, ptr getelementptr inbounds ([1 x %struct.__tgt_offload_entry], ptr @__sycl_offload_entries_arr, i64 1, i64 0), ptr @__sycl_offload_prop_sets_arr.5, ptr getelementptr inbounds ([3 x %_pi_device_binary_property_set_struct], ptr @__sycl_offload_prop_sets_arr.5, i64 1, i64 0) }]
// CHECK-DAG: @.sycl_offloading.descriptor = internal constant %__sycl.tgt_bin_desc { i16 1, i16 1, ptr @.sycl_offloading.device_images, ptr null, ptr null }
// CHECK-DAG: @llvm.global_ctors = {{.*}} { i32 1, ptr @sycl.descriptor_reg, ptr null }]
// CHECK-DAG: @llvm.global_dtors = {{.*}} { i32 1, ptr @sycl.descriptor_unreg, ptr null }]
Expand Down
Loading

0 comments on commit c2cdfcc

Please sign in to comment.