Skip to content

Commit

Permalink
[amdgpu] Part4 link bitcode file (taichi-dev#7180)
Browse files Browse the repository at this point in the history
Issue: taichi-dev#6434

### Brief Summary
1. put amdgpu-related bitcode files in external/amdgpu_libdevice(thus
taichi just need libamdhip64.so)
2. link amdgpu-related bc files and taichi-module together(Precisely,
clone bc files into taichi-module)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
2 people authored and quadpixels committed May 13, 2023
1 parent e55f30f commit b74daf9
Show file tree
Hide file tree
Showing 45 changed files with 91 additions and 1 deletion.
6 changes: 6 additions & 0 deletions cmake/TaichiCore.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -500,3 +500,9 @@ if (NOT APPLE)
install(FILES ${CMAKE_SOURCE_DIR}/external/cuda_libdevice/slim_libdevice.10.bc
DESTINATION ${INSTALL_LIB_DIR}/runtime)
endif()

if (TI_WITH_AMDGPU)
file(GLOB AMDGPU_BC_FILES ${CMAKE_SOURCE_DIR}/external/amdgpu_libdevice/*.bc)
install(FILES ${AMDGPU_BC_FILES}
DESTINATION ${INSTALL_LIB_DIR}/runtime)
endif()
Binary file added external/amdgpu_libdevice/ockl.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_abi_version_400.bc
Binary file not shown.
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_daz_opt_off.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_finite_only_off.bc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_600.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_601.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_602.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_700.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_701.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_702.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_703.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_704.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_705.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_801.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_802.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_803.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_805.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_810.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_900.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_902.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_904.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_906.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_908.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_909.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_90a.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_isa_version_90c.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/oclc_unsafe_math_off.bc
Binary file not shown.
Binary file not shown.
Binary file added external/amdgpu_libdevice/ocml.bc
Binary file not shown.
Binary file added external/amdgpu_libdevice/opencl.bc
Binary file not shown.
82 changes: 82 additions & 0 deletions taichi/runtime/llvm/llvm_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@
#include "taichi/rhi/cuda/cuda_context.h"
#endif

#if defined(TI_WITH_AMDGPU)
#include "taichi/rhi/amdgpu/amdgpu_context.h"
#endif

namespace taichi::lang {

using namespace llvm;
Expand Down Expand Up @@ -486,6 +490,32 @@ std::unique_ptr<llvm::Module> TaichiLLVMContext::module_from_file(
// runtime_module->print(llvm::errs(), nullptr);
}

#ifdef TI_WITH_AMDGPU
auto patch_amdgpu_kernel_dim = [&](std::string name, llvm::Value *lhs) {
std::string actual_name;
if (name == "block_dim")
actual_name = "__ockl_get_local_size";
else if (name == "grid_dim")
actual_name = "__ockl_get_num_groups";
else
TI_ERROR("Unknown patch function name");
auto func = module->getFunction(name);
auto actual_func = module->getFunction(actual_name);
if (!func || !actual_func) {
return;
}
func->deleteBody();
auto bb = llvm::BasicBlock::Create(*ctx, "entry", func);
IRBuilder<> builder(*ctx);
builder.SetInsertPoint(bb);
auto dim_ = builder.CreateCall(actual_func->getFunctionType(),
actual_func, {lhs});
auto ret_ = builder.CreateTrunc(dim_, llvm::Type::getInt32Ty(*ctx));
builder.CreateRet(ret_);
TaichiLLVMContext::mark_inline(func);
};
#endif

if (arch_ == Arch::amdgpu) {
module->setTargetTriple("amdgcn-amd-amdhsa");
#ifdef TI_WITH_AMDGPU
Expand All @@ -498,6 +528,12 @@ std::unique_ptr<llvm::Module> TaichiLLVMContext::module_from_file(
function_pass_manager.doFinalization();
patch_intrinsic("thread_idx", llvm::Intrinsic::amdgcn_workitem_id_x);
patch_intrinsic("block_idx", llvm::Intrinsic::amdgcn_workgroup_id_x);

link_module_with_amdgpu_libdevice(module);
patch_amdgpu_kernel_dim(
"block_dim", llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0));
patch_amdgpu_kernel_dim(
"grid_dim", llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0));
#endif
}
}
Expand Down Expand Up @@ -537,6 +573,52 @@ void TaichiLLVMContext::link_module_with_cuda_libdevice(
}
}

void TaichiLLVMContext::link_module_with_amdgpu_libdevice(
std::unique_ptr<llvm::Module> &module) {
TI_ASSERT(arch_ == Arch::amdgpu);
#if defined(TI_WITH_AMDGPU)
auto isa_version = AMDGPUContext::get_instance().get_mcpu().substr(3, 4);
std::string libdevice_files[] = {"ocml.bc",
"oclc_wavefrontsize64_off.bc",
"ockl.bc",
"oclc_abi_version_400.bc",
"oclc_correctly_rounded_sqrt_off.bc",
"oclc_daz_opt_off.bc",
"oclc_finite_only_off.bc",
"oclc_isa_version_" + isa_version + ".bc",
"oclc_unsafe_math_off.bc",
"opencl.bc"};

for (auto &libdevice : libdevice_files) {
std::string lib_dir = runtime_lib_dir() + "/";
auto libdevice_module = module_from_bitcode_file(lib_dir + libdevice,
get_this_thread_context());

if (libdevice == "ocml.bc")
module->setDataLayout(libdevice_module->getDataLayout());

std::vector<std::string> libdevice_func_names;
for (auto &f : *libdevice_module) {
if (!f.isDeclaration()) {
libdevice_func_names.push_back(f.getName().str());
}
}

for (auto &f : libdevice_module->functions()) {
auto func_ = module->getFunction(f.getName());
if (!func_ && starts_with(f.getName().lower(), "__" + libdevice))
f.setLinkage(llvm::Function::CommonLinkage);
}

bool failed =
llvm::Linker::linkModules(*module, std::move(libdevice_module));
if (failed) {
TI_ERROR("AMDGPU libdevice linking failure.");
}
}
#endif
}

void TaichiLLVMContext::add_struct_module(std::unique_ptr<Module> module,
int tree_id) {
TI_AUTO_PROF;
Expand Down
2 changes: 2 additions & 0 deletions taichi/runtime/llvm/llvm_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ class TaichiLLVMContext {

void link_module_with_cuda_libdevice(std::unique_ptr<llvm::Module> &module);

void link_module_with_amdgpu_libdevice(std::unique_ptr<llvm::Module> &module);

static int num_instructions(llvm::Function *func);

void insert_nvvm_annotation(llvm::Function *func, std::string key, int val);
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/backends/amdgpu_device_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ TEST(AMDGPU, ConvertFuncParamAddressSpacePass) {
}
}

TEST(AMDGPU, ConvertProgramAndLaunch) {
TEST(AMDGPU, CompileProgramAndLaunch) {
std::string program =
"target datalayout = "
"\"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:"
Expand Down

0 comments on commit b74daf9

Please sign in to comment.