diff --git a/cmake/TaichiCore.cmake b/cmake/TaichiCore.cmake index c30ea1770edfa..f07ae3254dabe 100644 --- a/cmake/TaichiCore.cmake +++ b/cmake/TaichiCore.cmake @@ -500,3 +500,9 @@ if (NOT APPLE) install(FILES ${CMAKE_SOURCE_DIR}/external/cuda_libdevice/slim_libdevice.10.bc DESTINATION ${INSTALL_LIB_DIR}/runtime) endif() + +if (TI_WITH_AMDGPU) + file(GLOB AMDGPU_BC_FILES ${CMAKE_SOURCE_DIR}/external/amdgpu_libdevice/*.bc) + install(FILES ${AMDGPU_BC_FILES} + DESTINATION ${INSTALL_LIB_DIR}/runtime) +endif() diff --git a/external/amdgpu_libdevice/ockl.bc b/external/amdgpu_libdevice/ockl.bc new file mode 100644 index 0000000000000..455bd6b05d5cd Binary files /dev/null and b/external/amdgpu_libdevice/ockl.bc differ diff --git a/external/amdgpu_libdevice/oclc_abi_version_400.bc b/external/amdgpu_libdevice/oclc_abi_version_400.bc new file mode 100644 index 0000000000000..f9972bac27610 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_abi_version_400.bc differ diff --git a/external/amdgpu_libdevice/oclc_correctly_rounded_sqrt_off.bc b/external/amdgpu_libdevice/oclc_correctly_rounded_sqrt_off.bc new file mode 100644 index 0000000000000..98c8559ed88e1 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_correctly_rounded_sqrt_off.bc differ diff --git a/external/amdgpu_libdevice/oclc_daz_opt_off.bc b/external/amdgpu_libdevice/oclc_daz_opt_off.bc new file mode 100644 index 0000000000000..d42da7f94757b Binary files /dev/null and b/external/amdgpu_libdevice/oclc_daz_opt_off.bc differ diff --git a/external/amdgpu_libdevice/oclc_finite_only_off.bc b/external/amdgpu_libdevice/oclc_finite_only_off.bc new file mode 100644 index 0000000000000..93d343d69412c Binary files /dev/null and b/external/amdgpu_libdevice/oclc_finite_only_off.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_1010.bc b/external/amdgpu_libdevice/oclc_isa_version_1010.bc new file mode 100644 index 0000000000000..2ba76207a5f58 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_1010.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_1011.bc b/external/amdgpu_libdevice/oclc_isa_version_1011.bc new file mode 100644 index 0000000000000..40eb1c191e412 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_1011.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_1012.bc b/external/amdgpu_libdevice/oclc_isa_version_1012.bc new file mode 100644 index 0000000000000..c0991daf5b89e Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_1012.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_1013.bc b/external/amdgpu_libdevice/oclc_isa_version_1013.bc new file mode 100644 index 0000000000000..b854939d9ecfa Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_1013.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_1030.bc b/external/amdgpu_libdevice/oclc_isa_version_1030.bc new file mode 100644 index 0000000000000..c86cc9b6f3b78 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_1030.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_1031.bc b/external/amdgpu_libdevice/oclc_isa_version_1031.bc new file mode 100644 index 0000000000000..02d4adf24eb1b Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_1031.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_1032.bc b/external/amdgpu_libdevice/oclc_isa_version_1032.bc new file mode 100644 index 0000000000000..3dc016e396317 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_1032.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_1033.bc b/external/amdgpu_libdevice/oclc_isa_version_1033.bc new file mode 100644 index 0000000000000..227fc07f77479 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_1033.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_1034.bc b/external/amdgpu_libdevice/oclc_isa_version_1034.bc new file mode 100644 index 0000000000000..c1097ede929d0 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_1034.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_1035.bc b/external/amdgpu_libdevice/oclc_isa_version_1035.bc new file mode 100644 index 0000000000000..4be025e3048d6 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_1035.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_600.bc b/external/amdgpu_libdevice/oclc_isa_version_600.bc new file mode 100644 index 0000000000000..e6807e7b1f93b Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_600.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_601.bc b/external/amdgpu_libdevice/oclc_isa_version_601.bc new file mode 100644 index 0000000000000..e9e6032d1d155 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_601.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_602.bc b/external/amdgpu_libdevice/oclc_isa_version_602.bc new file mode 100644 index 0000000000000..16a2c1139e0c5 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_602.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_700.bc b/external/amdgpu_libdevice/oclc_isa_version_700.bc new file mode 100644 index 0000000000000..667d13ccf3cdc Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_700.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_701.bc b/external/amdgpu_libdevice/oclc_isa_version_701.bc new file mode 100644 index 0000000000000..b839a2b459760 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_701.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_702.bc b/external/amdgpu_libdevice/oclc_isa_version_702.bc new file mode 100644 index 0000000000000..ef26c938848fc Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_702.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_703.bc b/external/amdgpu_libdevice/oclc_isa_version_703.bc new file mode 100644 index 0000000000000..3535c52c30e0f Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_703.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_704.bc b/external/amdgpu_libdevice/oclc_isa_version_704.bc new file mode 100644 index 0000000000000..111bff2b33422 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_704.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_705.bc b/external/amdgpu_libdevice/oclc_isa_version_705.bc new file mode 100644 index 0000000000000..50987d08eb402 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_705.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_801.bc b/external/amdgpu_libdevice/oclc_isa_version_801.bc new file mode 100644 index 0000000000000..b12cbb79faac6 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_801.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_802.bc b/external/amdgpu_libdevice/oclc_isa_version_802.bc new file mode 100644 index 0000000000000..7033213adad3b Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_802.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_803.bc b/external/amdgpu_libdevice/oclc_isa_version_803.bc new file mode 100644 index 0000000000000..886db17a7a82d Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_803.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_805.bc b/external/amdgpu_libdevice/oclc_isa_version_805.bc new file mode 100644 index 0000000000000..d4d112bc6bd24 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_805.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_810.bc b/external/amdgpu_libdevice/oclc_isa_version_810.bc new file mode 100644 index 0000000000000..d9439b47ed1de Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_810.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_900.bc b/external/amdgpu_libdevice/oclc_isa_version_900.bc new file mode 100644 index 0000000000000..b71dc501f1d01 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_900.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_902.bc b/external/amdgpu_libdevice/oclc_isa_version_902.bc new file mode 100644 index 0000000000000..dd5ee25ddad38 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_902.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_904.bc b/external/amdgpu_libdevice/oclc_isa_version_904.bc new file mode 100644 index 0000000000000..17a735e8702b5 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_904.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_906.bc b/external/amdgpu_libdevice/oclc_isa_version_906.bc new file mode 100644 index 0000000000000..d33593a30fb66 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_906.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_908.bc b/external/amdgpu_libdevice/oclc_isa_version_908.bc new file mode 100644 index 0000000000000..cae4353309fb6 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_908.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_909.bc b/external/amdgpu_libdevice/oclc_isa_version_909.bc new file mode 100644 index 0000000000000..d2c595e094241 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_909.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_90a.bc b/external/amdgpu_libdevice/oclc_isa_version_90a.bc new file mode 100644 index 0000000000000..8ef94e67a1514 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_90a.bc differ diff --git a/external/amdgpu_libdevice/oclc_isa_version_90c.bc b/external/amdgpu_libdevice/oclc_isa_version_90c.bc new file mode 100644 index 0000000000000..edbbc270c6a7e Binary files /dev/null and b/external/amdgpu_libdevice/oclc_isa_version_90c.bc differ diff --git a/external/amdgpu_libdevice/oclc_unsafe_math_off.bc b/external/amdgpu_libdevice/oclc_unsafe_math_off.bc new file mode 100644 index 0000000000000..e6382c8119b93 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_unsafe_math_off.bc differ diff --git a/external/amdgpu_libdevice/oclc_wavefrontsize64_off.bc b/external/amdgpu_libdevice/oclc_wavefrontsize64_off.bc new file mode 100644 index 0000000000000..fcab444056b83 Binary files /dev/null and b/external/amdgpu_libdevice/oclc_wavefrontsize64_off.bc differ diff --git a/external/amdgpu_libdevice/ocml.bc b/external/amdgpu_libdevice/ocml.bc new file mode 100644 index 0000000000000..4467fbba74c51 Binary files /dev/null and b/external/amdgpu_libdevice/ocml.bc differ diff --git a/external/amdgpu_libdevice/opencl.bc b/external/amdgpu_libdevice/opencl.bc new file mode 100644 index 0000000000000..697c378b5f07b Binary files /dev/null and b/external/amdgpu_libdevice/opencl.bc differ diff --git a/taichi/runtime/llvm/llvm_context.cpp b/taichi/runtime/llvm/llvm_context.cpp index 6494bd72994ce..77e6bc1199a6a 100644 --- a/taichi/runtime/llvm/llvm_context.cpp +++ b/taichi/runtime/llvm/llvm_context.cpp @@ -61,6 +61,10 @@ #include "taichi/rhi/cuda/cuda_context.h" #endif +#if defined(TI_WITH_AMDGPU) +#include "taichi/rhi/amdgpu/amdgpu_context.h" +#endif + namespace taichi::lang { using namespace llvm; @@ -486,6 +490,32 @@ std::unique_ptr TaichiLLVMContext::module_from_file( // runtime_module->print(llvm::errs(), nullptr); } +#ifdef TI_WITH_AMDGPU + auto patch_amdgpu_kernel_dim = [&](std::string name, llvm::Value *lhs) { + std::string actual_name; + if (name == "block_dim") + actual_name = "__ockl_get_local_size"; + else if (name == "grid_dim") + actual_name = "__ockl_get_num_groups"; + else + TI_ERROR("Unknown patch function name"); + auto func = module->getFunction(name); + auto actual_func = module->getFunction(actual_name); + if (!func || !actual_func) { + return; + } + func->deleteBody(); + auto bb = llvm::BasicBlock::Create(*ctx, "entry", func); + IRBuilder<> builder(*ctx); + builder.SetInsertPoint(bb); + auto dim_ = builder.CreateCall(actual_func->getFunctionType(), + actual_func, {lhs}); + auto ret_ = builder.CreateTrunc(dim_, llvm::Type::getInt32Ty(*ctx)); + builder.CreateRet(ret_); + TaichiLLVMContext::mark_inline(func); + }; +#endif + if (arch_ == Arch::amdgpu) { module->setTargetTriple("amdgcn-amd-amdhsa"); #ifdef TI_WITH_AMDGPU @@ -498,6 +528,12 @@ std::unique_ptr TaichiLLVMContext::module_from_file( function_pass_manager.doFinalization(); patch_intrinsic("thread_idx", llvm::Intrinsic::amdgcn_workitem_id_x); patch_intrinsic("block_idx", llvm::Intrinsic::amdgcn_workgroup_id_x); + + link_module_with_amdgpu_libdevice(module); + patch_amdgpu_kernel_dim( + "block_dim", llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0)); + patch_amdgpu_kernel_dim( + "grid_dim", llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0)); #endif } } @@ -537,6 +573,52 @@ void TaichiLLVMContext::link_module_with_cuda_libdevice( } } +void TaichiLLVMContext::link_module_with_amdgpu_libdevice( + std::unique_ptr &module) { + TI_ASSERT(arch_ == Arch::amdgpu); +#if defined(TI_WITH_AMDGPU) + auto isa_version = AMDGPUContext::get_instance().get_mcpu().substr(3, 4); + std::string libdevice_files[] = {"ocml.bc", + "oclc_wavefrontsize64_off.bc", + "ockl.bc", + "oclc_abi_version_400.bc", + "oclc_correctly_rounded_sqrt_off.bc", + "oclc_daz_opt_off.bc", + "oclc_finite_only_off.bc", + "oclc_isa_version_" + isa_version + ".bc", + "oclc_unsafe_math_off.bc", + "opencl.bc"}; + + for (auto &libdevice : libdevice_files) { + std::string lib_dir = runtime_lib_dir() + "/"; + auto libdevice_module = module_from_bitcode_file(lib_dir + libdevice, + get_this_thread_context()); + + if (libdevice == "ocml.bc") + module->setDataLayout(libdevice_module->getDataLayout()); + + std::vector libdevice_func_names; + for (auto &f : *libdevice_module) { + if (!f.isDeclaration()) { + libdevice_func_names.push_back(f.getName().str()); + } + } + + for (auto &f : libdevice_module->functions()) { + auto func_ = module->getFunction(f.getName()); + if (!func_ && starts_with(f.getName().lower(), "__" + libdevice)) + f.setLinkage(llvm::Function::CommonLinkage); + } + + bool failed = + llvm::Linker::linkModules(*module, std::move(libdevice_module)); + if (failed) { + TI_ERROR("AMDGPU libdevice linking failure."); + } + } +#endif +} + void TaichiLLVMContext::add_struct_module(std::unique_ptr module, int tree_id) { TI_AUTO_PROF; diff --git a/taichi/runtime/llvm/llvm_context.h b/taichi/runtime/llvm/llvm_context.h index ddea66efd763d..866c80e7882e4 100644 --- a/taichi/runtime/llvm/llvm_context.h +++ b/taichi/runtime/llvm/llvm_context.h @@ -151,6 +151,8 @@ class TaichiLLVMContext { void link_module_with_cuda_libdevice(std::unique_ptr &module); + void link_module_with_amdgpu_libdevice(std::unique_ptr &module); + static int num_instructions(llvm::Function *func); void insert_nvvm_annotation(llvm::Function *func, std::string key, int val); diff --git a/tests/cpp/backends/amdgpu_device_test.cpp b/tests/cpp/backends/amdgpu_device_test.cpp index a5d3683c2d403..5f6aa6af97f8f 100644 --- a/tests/cpp/backends/amdgpu_device_test.cpp +++ b/tests/cpp/backends/amdgpu_device_test.cpp @@ -193,7 +193,7 @@ TEST(AMDGPU, ConvertFuncParamAddressSpacePass) { } } -TEST(AMDGPU, ConvertProgramAndLaunch) { +TEST(AMDGPU, CompileProgramAndLaunch) { std::string program = "target datalayout = " "\"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:"