-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Issue: #6434 ### Brief Summary 1. This is the third part of adding the backend of amdgpu: adding the runtime part of the implementation. The main code for runtime is llvm ir generating gcn-isa/object and hsaco (which is a file format that can be accepted by the module launch api provided by hip) 2. After calling the relevant api to generate the gcn isa/obj, the linker of llvm (ld.lld) needs to be called to generate the hsaco file format, so there is a command line call `ld.lld -shared xxx.o -o xxx.hsaco` in the code, and the temporarily generated file is stored in the `/tmp/taichi_hsaco/` folder 3. To deal with the problem of multiple `hsaco` files being generated at the same time, a random number is used to name the related generated files, as follows: in `JITSessionAMDGPU` there is a `random_num_` and `tmp_dir_` which are assigned when the `JITSessionAMDGPU` instance is created. Each `ti.kernel` will be devided into offload-tasks which is compiled into a separate `hsaco` file. A random number bound to the `hsaco` file is obtained when the `hsaco` file is generated. Here is an example of the file after running the `ti example mpm128`: ``` taichi_hsaco/ └── 4858208420434830779 ├── taichi_amdgcn_10476395765980093855.hsaco ├── taichi_amdgcn_10476395765980093855.o ├── taichi_amdgcn_11369096326162657620.hsaco ├── taichi_amdgcn_11369096326162657620.o ├── taichi_amdgcn_11700031850871498261.hsaco ├── taichi_amdgcn_11700031850871498261.o ├── taichi_amdgcn_14803499569653867868.hsaco ├── taichi_amdgcn_14803499569653867868.o ├── taichi_amdgcn_14949458395707884954.hsaco ├── taichi_amdgcn_14949458395707884954.o ├── taichi_amdgcn_15955762247261446379.hsaco ├── taichi_amdgcn_15955762247261446379.o ├── taichi_amdgcn_16891452471041191610.hsaco ├── taichi_amdgcn_16891452471041191610.o ├── taichi_amdgcn_17615766226135707772.hsaco ├── taichi_amdgcn_17615766226135707772.o ├── taichi_amdgcn_18033844193337069056.hsaco ├── taichi_amdgcn_18033844193337069056.o ├── taichi_amdgcn_5951151729973841331.hsaco ├── taichi_amdgcn_5951151729973841331.o ├── taichi_amdgcn_6012043323411824926.hsaco ├── taichi_amdgcn_6012043323411824926.o ├── taichi_amdgcn_6796840558965541322.hsaco ├── taichi_amdgcn_6796840558965541322.o ├── taichi_amdgcn_6835984424286808860.hsaco ├── taichi_amdgcn_6835984424286808860.o ├── taichi_amdgcn_7872622170129629907.hsaco ├── taichi_amdgcn_7872622170129629907.o ├── taichi_amdgcn_8760441738982760858.hsaco ├── taichi_amdgcn_8760441738982760858.o ├── taichi_amdgcn_9006625347419529255.hsaco └── taichi_amdgcn_9006625347419529255.o ``` Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
- Loading branch information
1 parent
35a0e5b
commit e1165db
Showing
7 changed files
with
380 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# ./taichi/runtime/amdgpu/CMakeLists.txt | ||
|
||
add_library(amdgpu_runtime) | ||
target_sources(amdgpu_runtime | ||
PRIVATE | ||
jit_amdgpu.cpp | ||
) | ||
|
||
target_include_directories(amdgpu_runtime | ||
PRIVATE | ||
${PROJECT_SOURCE_DIR} | ||
${PROJECT_SOURCE_DIR}/external/eigen | ||
${PROJECT_SOURCE_DIR}/external/spdlog/include | ||
${LLVM_INCLUDE_DIRS} | ||
) | ||
|
||
target_link_libraries(amdgpu_runtime PRIVATE amdgpu_rhi) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
#include "taichi/runtime/amdgpu/jit_amdgpu.h" | ||
#include "taichi/runtime/llvm/llvm_context.h" | ||
|
||
namespace taichi { | ||
namespace lang { | ||
|
||
#if defined(TI_WITH_AMDGPU) | ||
JITModule *JITSessionAMDGPU ::add_module(std::unique_ptr<llvm::Module> M, | ||
int max_reg) { | ||
auto hsaco = compile_module_to_hsaco(M); | ||
TI_TRACE("hsaco size: {:.2f}KB", hsaco.size() / 1024.0); | ||
|
||
void *amdgpu_module; | ||
auto t = Time::get_time(); | ||
AMDGPUDriver::get_instance().module_load_data(&amdgpu_module, hsaco.c_str()); | ||
TI_TRACE("AMDGPU load data from module time : {}ms", | ||
(Time::get_time() - t) * 1000); | ||
modules.push_back(std::make_unique<JITModuleAMDGPU>(amdgpu_module)); | ||
return modules.back().get(); | ||
} | ||
|
||
std::string JITSessionAMDGPU::compile_module_to_hsaco( | ||
std::unique_ptr<llvm::Module> &llvm_module) { | ||
if (llvm::verifyModule(*llvm_module, &llvm::errs())) { | ||
llvm_module->print(llvm::errs(), nullptr); | ||
TI_WARN("Module broken"); | ||
} | ||
using namespace llvm; | ||
|
||
if (this->config_->print_kernel_llvm_ir) { | ||
static FileSequenceWriter writer("taichi_kernel_amdgpu_llvm_ir_{:04d}.ll", | ||
"unoptimized LLVM IR (AMDGPU)"); | ||
writer.write(llvm_module.get()); | ||
} | ||
auto triple_str = llvm_module->getTargetTriple(); | ||
std::string error_str; | ||
auto target = llvm::TargetRegistry::lookupTarget(triple_str, error_str); | ||
llvm::TargetOptions options; | ||
std::unique_ptr<llvm::TargetMachine> machine(target->createTargetMachine( | ||
triple_str, AMDGPUContext::get_instance().get_mcpu(), "", options, | ||
llvm::Reloc::PIC_, llvm::CodeModel::Small, llvm::CodeGenOpt::Aggressive)); | ||
|
||
llvm_module->setDataLayout(machine->createDataLayout()); | ||
|
||
llvm::legacy::FunctionPassManager function_pass_manager(llvm_module.get()); | ||
llvm::legacy::PassManager module_pass_manager; | ||
|
||
module_pass_manager.add(llvm::createTargetTransformInfoWrapperPass( | ||
machine->getTargetIRAnalysis())); | ||
function_pass_manager.add(llvm::createTargetTransformInfoWrapperPass( | ||
machine->getTargetIRAnalysis())); | ||
|
||
llvm::PassManagerBuilder builder; | ||
builder.OptLevel = 3; | ||
builder.Inliner = | ||
llvm::createFunctionInliningPass(builder.OptLevel, 0, false); | ||
machine->adjustPassManager(builder); | ||
builder.populateFunctionPassManager(function_pass_manager); | ||
builder.populateModulePassManager(module_pass_manager); | ||
|
||
machine->Options.MCOptions.AsmVerbose = true; | ||
|
||
auto tmp_dir = get_tmp_dir(); | ||
uint64 random_num = get_random_num(); | ||
|
||
auto obj_filename = "taichi_amdgcn_" + std::to_string(random_num) + ".o"; | ||
auto hsaco_filename = | ||
"taichi_amdgcn_" + std::to_string(random_num) + ".hsaco"; | ||
auto obj_path = tmp_dir + obj_filename; | ||
auto hsaco_path = tmp_dir + hsaco_filename; | ||
std::error_code ec; | ||
|
||
llvm::SmallString<0> outstr; | ||
llvm::raw_svector_ostream llvm_stream(outstr); | ||
|
||
machine->addPassesToEmitFile(module_pass_manager, llvm_stream, nullptr, | ||
llvm::CGFT_ObjectFile, true); | ||
function_pass_manager.doInitialization(); | ||
for (auto func = llvm_module->begin(); func != llvm_module->end(); ++func) | ||
function_pass_manager.run(*func); | ||
function_pass_manager.doFinalization(); | ||
module_pass_manager.run(*llvm_module); | ||
|
||
std::string obj_str(outstr.begin(), outstr.end()); | ||
std::ofstream(obj_path) << obj_str; | ||
|
||
TI_TRACE("Loading module..."); | ||
[[maybe_unused]] auto _ = AMDGPUContext::get_instance().get_lock_guard(); | ||
|
||
std::string lld_cmd = "ld.lld -shared " + obj_path + " -o " + hsaco_path; | ||
if (std::system(lld_cmd.c_str())) | ||
TI_ERROR(fmt::format("Generate {} Error", hsaco_filename)); | ||
|
||
std::string hsaco_str = load_hsaco(hsaco_path); | ||
|
||
if (this->config_->print_kernel_llvm_ir_optimized) { | ||
static FileSequenceWriter writer( | ||
"taichi_kernel_amdgpu_llvm_ir_optimized_{:04d}.ll", | ||
"unoptimized LLVM IR (AMDGPU)"); | ||
writer.write(llvm_module.get()); | ||
} | ||
return hsaco_str; | ||
} | ||
|
||
std::unique_ptr<JITSession> create_llvm_jit_session_amdgpu( | ||
TaichiLLVMContext *tlctx, | ||
CompileConfig *config, | ||
Arch arch) { | ||
TI_ASSERT(arch == Arch::amdgpu); | ||
auto data_layout = llvm::DataLayout( | ||
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-" | ||
"v16:16-v24:32-" | ||
"v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-" | ||
"n32:64-S32-A5-G1-ni:7"); | ||
return std::make_unique<JITSessionAMDGPU>(tlctx, config, data_layout); | ||
} | ||
#else | ||
std::unique_ptr<JITSession> create_llvm_jit_session_amdgpu( | ||
TaichiLLVMContext *tlctx, | ||
CompileConfig *config, | ||
Arch arch) { | ||
TI_NOT_IMPLEMENTED | ||
} | ||
#endif | ||
|
||
} // namespace lang | ||
} // namespace taichi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
#include <memory> | ||
#include <utility> | ||
#include <mutex> | ||
#include <random> | ||
#include <unistd.h> | ||
|
||
#include "llvm/ADT/StringRef.h" | ||
#include "llvm/Support/DynamicLibrary.h" | ||
#include "llvm/Support/raw_ostream.h" | ||
#include "llvm/Target/TargetMachine.h" | ||
#include "llvm/IR/Module.h" | ||
#include "llvm/IR/DataLayout.h" | ||
#include "llvm/IR/LLVMContext.h" | ||
#include "llvm/IR/LegacyPassManager.h" | ||
#include "llvm/IR/Verifier.h" | ||
#include "llvm/Transforms/InstCombine/InstCombine.h" | ||
#include "llvm/Transforms/Scalar.h" | ||
#include "llvm/Transforms/Scalar/GVN.h" | ||
#include "llvm/Transforms/IPO.h" | ||
#include "llvm/Transforms/IPO/PassManagerBuilder.h" | ||
#include "llvm/Analysis/TargetTransformInfo.h" | ||
#include "llvm/MC/TargetRegistry.h" | ||
#include "llvm/Target/TargetMachine.h" | ||
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" | ||
|
||
#include "taichi/rhi/amdgpu/amdgpu_context.h" | ||
#include "taichi/rhi/amdgpu/amdgpu_driver.h" | ||
#include "taichi/jit/jit_session.h" | ||
#include "taichi/util/lang_util.h" | ||
#include "taichi/program/program.h" | ||
#include "taichi/system/timer.h" | ||
#include "taichi/util/file_sequence_writer.h" | ||
#include "taichi/util/io.h" | ||
|
||
#define TI_RUNTIME_HOST | ||
#include "taichi/program/context.h" | ||
#undef TI_RUNTIME_HOST | ||
|
||
namespace taichi { | ||
namespace lang { | ||
|
||
#if defined(TI_WITH_AMDGPU) | ||
|
||
class JITModuleAMDGPU : public JITModule { | ||
private: | ||
void *module_; | ||
|
||
public: | ||
explicit JITModuleAMDGPU(void *module) : module_(module) { | ||
} | ||
|
||
void *lookup_function(const std::string &name) override { | ||
AMDGPUContext::get_instance().make_current(); | ||
void *func = nullptr; | ||
auto t = Time::get_time(); | ||
auto err = | ||
AMDGPUDriver::get_instance().module_get_function.call_with_warning( | ||
&func, module_, name.c_str()); | ||
if (err) { | ||
TI_ERROR("Cannot look up function {}", name); | ||
} | ||
t = Time::get_time() - t; | ||
TI_TRACE("AMDGPU module_get_function {} costs {} ms", name, t * 1000); | ||
TI_ASSERT(func != nullptr); | ||
return func; | ||
} | ||
|
||
void call(const std::string &name, | ||
const std::vector<void *> &arg_pointers, | ||
const std::vector<int> &arg_sizes) override { | ||
launch(name, 1, 1, 0, arg_pointers, arg_sizes); | ||
} | ||
|
||
void launch(const std::string &name, | ||
std::size_t grid_dim, | ||
std::size_t block_dim, | ||
std::size_t dynamic_shared_mem_bytes, | ||
const std::vector<void *> &arg_pointers, | ||
const std::vector<int> &arg_sizes) override { | ||
auto func = lookup_function(name); | ||
AMDGPUContext::get_instance().launch(func, name, arg_pointers, arg_sizes, | ||
grid_dim, block_dim, | ||
dynamic_shared_mem_bytes); | ||
} | ||
|
||
bool direct_dispatch() const override { | ||
return false; | ||
} | ||
}; | ||
|
||
class JITSessionAMDGPU : public JITSession { | ||
public: | ||
llvm::DataLayout data_layout; | ||
|
||
JITSessionAMDGPU(TaichiLLVMContext *tlctx, | ||
CompileConfig *config, | ||
llvm::DataLayout data_layout) | ||
: JITSession(tlctx, config), data_layout(data_layout) { | ||
random_num_ = get_random_num(); | ||
char *env_dir = std::getenv("TI_TMP_DIR"); | ||
tmp_dir_ = "/tmp/taichi_hsaco/"; | ||
if (env_dir) { | ||
tmp_dir_ = env_dir; | ||
if (tmp_dir_[tmp_dir_.size() - 1] != '/') { | ||
tmp_dir_ += '/'; | ||
} | ||
} | ||
tmp_dir_ += std::to_string(random_num_) + "/"; | ||
create_directories(tmp_dir_); | ||
} | ||
|
||
JITModule *add_module(std::unique_ptr<llvm::Module> M, int max_reg) override; | ||
|
||
llvm::DataLayout get_data_layout() override { | ||
return data_layout; | ||
} | ||
|
||
std::string load_hsaco(const std::string &filename) { | ||
std::ifstream src_file(filename); | ||
if (!src_file.is_open()) { | ||
TI_ERROR(fmt::format("Open {} Error", filename)); | ||
} | ||
return std::string(std::istreambuf_iterator<char>(src_file), | ||
(std::istreambuf_iterator<char>())); | ||
} | ||
|
||
uint64 get_random_num() { | ||
// Note: ROCm is available only on Linux OS. | ||
static std::random_device device("/dev/urandom"); | ||
static std::mt19937_64 *rng = new std::mt19937_64(device()); | ||
return (*rng)(); | ||
} | ||
|
||
std::string get_tmp_dir() { | ||
return tmp_dir_; | ||
} | ||
|
||
private: | ||
std::string compile_module_to_hsaco(std::unique_ptr<llvm::Module> &module); | ||
uint64_t random_num_; | ||
std::string tmp_dir_; | ||
}; | ||
|
||
#endif | ||
|
||
std::unique_ptr<JITSession> create_llvm_jit_session_amdgpu( | ||
TaichiLLVMContext *tlctx, | ||
CompileConfig *config, | ||
Arch arch); | ||
|
||
} // namespace lang | ||
} // namespace taichi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.