forked from taichi-dev/taichi
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[amdgpu] Part0 add render hardware interface (taichi-dev#6464)
Issue: #taichi-dev#6434 ### Brief Summary It contains four parts(`driver`, `context`, `device` and `caching_allocator`). The code is similar to `cuda/rhi`. However, there are still some differences between `amdgpu/rhi` and `cuda/rhi` #### context 1. The method of obtaining the hardware version 2. Context::launch #### driver 1. ROCm/hip internal functions #### cmake The current cmake compilation system is sufficient to support the Unit test in taichi-dev#6597 Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
- Loading branch information
1 parent
78a7973
commit 2a60142
Showing
15 changed files
with
921 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# ./taichi/rhi/amdgpu/CMakeLists.txt | ||
|
||
set(AMDGPU_RHI amdgpu_rhi) | ||
add_library(${AMDGPU_RHI}) | ||
target_sources(${AMDGPU_RHI} | ||
PRIVATE | ||
amdgpu_device.cpp | ||
amdgpu_caching_allocator.cpp | ||
amdgpu_context.cpp | ||
amdgpu_driver.cpp | ||
) | ||
|
||
target_include_directories(${AMDGPU_RHI} | ||
PRIVATE | ||
${PROJECT_SOURCE_DIR} | ||
${PROJECT_SOURCE_DIR}/external/eigen | ||
${PROJECT_SOURCE_DIR}/external/spdlog/include | ||
${LLVM_INCLUDE_DIRS} | ||
) | ||
|
||
target_link_libraries(${AMDGPU_RHI} PRIVATE interop_rhi) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#include "taichi/rhi/amdgpu/amdgpu_caching_allocator.h" | ||
|
||
namespace taichi { | ||
namespace lang { | ||
namespace amdgpu { | ||
|
||
AmdgpuCachingAllocator::AmdgpuCachingAllocator(LlvmDevice *device) | ||
: device_(device) { | ||
} | ||
|
||
uint64_t *AmdgpuCachingAllocator::allocate( | ||
const LlvmDevice::LlvmRuntimeAllocParams ¶ms) { | ||
uint64_t *ret{nullptr}; | ||
auto size_aligned = taichi::iroundup(params.size, taichi_page_size); | ||
auto it_blk = mem_blocks_.lower_bound(size_aligned); | ||
|
||
if (it_blk != mem_blocks_.end()) { | ||
size_t remaining_sz = it_blk->first - size_aligned; | ||
if (remaining_sz > 0) { | ||
TI_ASSERT(remaining_sz % taichi_page_size == 0); | ||
auto remaining_head = | ||
reinterpret_cast<uint8_t *>(it_blk->second) + size_aligned; | ||
mem_blocks_.insert( | ||
{remaining_sz, reinterpret_cast<uint64_t *>(remaining_head)}); | ||
} | ||
ret = it_blk->second; | ||
mem_blocks_.erase(it_blk); | ||
} else { | ||
ret = device_->allocate_llvm_runtime_memory_jit(params); | ||
} | ||
return ret; | ||
} | ||
|
||
void AmdgpuCachingAllocator::release(size_t sz, uint64_t *ptr) { | ||
mem_blocks_.insert({sz, ptr}); | ||
} | ||
|
||
} // namespace amdgpu | ||
} // namespace lang | ||
} // namespace taichi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#pragma once | ||
|
||
#include "taichi/common/core.h" | ||
#include "taichi/math/arithmetic.h" | ||
#include "taichi/rhi/llvm/llvm_device.h" | ||
#include "taichi/inc/constants.h" | ||
#include <stdint.h> | ||
#include <map> | ||
|
||
namespace taichi { | ||
namespace lang { | ||
namespace amdgpu { | ||
|
||
class AmdgpuCachingAllocator { | ||
public: | ||
AmdgpuCachingAllocator(LlvmDevice *device); | ||
|
||
uint64_t *allocate(const LlvmDevice::LlvmRuntimeAllocParams ¶ms); | ||
void release(size_t sz, uint64_t *ptr); | ||
|
||
private: | ||
std::multimap<size_t, uint64_t *> mem_blocks_; | ||
LlvmDevice *device_{nullptr}; | ||
}; | ||
|
||
} // namespace amdgpu | ||
} // namespace lang | ||
} // namespace taichi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
#define TI_RUNTIME_HOST | ||
#include "amdgpu_context.h" | ||
|
||
#include <unordered_map> | ||
#include <mutex> | ||
|
||
#include "taichi/util/lang_util.h" | ||
#include "taichi/program/program.h" | ||
#include "taichi/system/threading.h" | ||
#include "taichi/rhi/amdgpu/amdgpu_driver.h" | ||
#include "taichi/analysis/offline_cache_util.h" | ||
|
||
namespace taichi { | ||
namespace lang { | ||
|
||
AMDGPUContext::AMDGPUContext() | ||
: driver_(AMDGPUDriver::get_instance_without_context()) { | ||
dev_count_ = 0; | ||
driver_.init(0); | ||
driver_.device_get_count(&dev_count_); | ||
driver_.device_get(&device_, 0); | ||
|
||
char name[128]; | ||
driver_.device_get_name(name, 128, device_); | ||
|
||
TI_TRACE("Using AMDGPU device [id=0]: {}", name); | ||
|
||
driver_.context_create(&context_, 0, device_); | ||
|
||
const auto GB = std::pow(1024.0, 3.0); | ||
TI_TRACE("Total memory {:.2f} GB; free memory {:.2f} GB", | ||
get_total_memory() / GB, get_free_memory() / GB); | ||
|
||
void *hip_device_prop = std::malloc(HIP_DEVICE_PROPERTIES_STRUCT_SIZE); | ||
driver_.device_get_prop(hip_device_prop, device_); | ||
compute_capability_ = *((int *)hip_device_prop + HIP_DEVICE_GCN_ARCH); | ||
std::free(hip_device_prop); | ||
|
||
mcpu_ = fmt::format("gfx{}", compute_capability_); | ||
|
||
TI_TRACE("Emitting AMDGPU code for {}", mcpu_); | ||
} | ||
|
||
std::size_t AMDGPUContext::get_total_memory() { | ||
std::size_t ret, _; | ||
driver_.mem_get_info(&_, &ret); | ||
return ret; | ||
} | ||
|
||
std::size_t AMDGPUContext::get_free_memory() { | ||
std::size_t ret, _; | ||
driver_.mem_get_info(&ret, &_); | ||
return ret; | ||
} | ||
|
||
std::string AMDGPUContext::get_device_name() { | ||
constexpr uint32_t kMaxNameStringLength = 128; | ||
char name[kMaxNameStringLength]; | ||
driver_.device_get_name(name, kMaxNameStringLength /*=128*/, device_); | ||
std::string str(name); | ||
return str; | ||
} | ||
|
||
void AMDGPUContext::launch(void *func, | ||
const std::string &task_name, | ||
void *arg_pointers, | ||
unsigned grid_dim, | ||
unsigned block_dim, | ||
std::size_t dynamic_shared_mem_bytes, | ||
int arg_bytes) { | ||
if (grid_dim > 0) { | ||
std::lock_guard<std::mutex> _(lock_); | ||
void *config[] = {(void *)0x01, const_cast<void *>(arg_pointers), | ||
(void *)0x02, &arg_bytes, (void *)0x03}; | ||
driver_.launch_kernel(func, grid_dim, 1, 1, block_dim, 1, 1, | ||
dynamic_shared_mem_bytes, nullptr, nullptr, | ||
reinterpret_cast<void **>(&config)); | ||
} | ||
if (debug_) { | ||
driver_.stream_synchronize(nullptr); | ||
} | ||
} | ||
|
||
AMDGPUContext::~AMDGPUContext() { | ||
} | ||
|
||
AMDGPUContext &AMDGPUContext::get_instance() { | ||
static auto context = new AMDGPUContext(); | ||
return *context; | ||
} | ||
|
||
} // namespace lang | ||
} // namespace taichi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#pragma once | ||
|
||
#include <mutex> | ||
#include <unordered_map> | ||
#include <thread> | ||
|
||
#include "taichi/program/kernel_profiler.h" | ||
#include "taichi/rhi/amdgpu/amdgpu_driver.h" | ||
|
||
namespace taichi { | ||
namespace lang { | ||
|
||
class AMDGPUDriver; | ||
|
||
class AMDGPUContext { | ||
private: | ||
void *device_; | ||
void *context_; | ||
int dev_count_; | ||
int compute_capability_; | ||
std::string mcpu_; | ||
std::mutex lock_; | ||
AMDGPUDriver &driver_; | ||
bool debug_; | ||
|
||
public: | ||
AMDGPUContext(); | ||
|
||
std::size_t get_total_memory(); | ||
std::size_t get_free_memory(); | ||
std::string get_device_name(); | ||
|
||
bool detected() const { | ||
return dev_count_ != 0; | ||
} | ||
|
||
void launch(void *func, | ||
const std::string &task_name, | ||
void *arg_pointers, | ||
unsigned grid_dim, | ||
unsigned block_dim, | ||
std::size_t dynamic_shared_mem_bytes, | ||
int arg_bytes); | ||
|
||
void set_debug(bool debug) { | ||
debug_ = debug; | ||
} | ||
|
||
std::string get_mcpu() const { | ||
return mcpu_; | ||
} | ||
|
||
void *get_context() { | ||
return context_; | ||
} | ||
|
||
void make_current() { | ||
driver_.context_set_current(context_); | ||
} | ||
|
||
int get_compute_capability() const { | ||
return compute_capability_; | ||
} | ||
|
||
~AMDGPUContext(); | ||
|
||
class ContextGuard { | ||
private: | ||
void *old_ctx_; | ||
void *new_ctx_; | ||
|
||
public: | ||
ContextGuard(AMDGPUContext *new_ctx) | ||
: old_ctx_(nullptr), new_ctx_(new_ctx) { | ||
AMDGPUDriver::get_instance().context_get_current(&old_ctx_); | ||
if (old_ctx_ != new_ctx) | ||
new_ctx->make_current(); | ||
} | ||
|
||
~ContextGuard() { | ||
if (old_ctx_ != new_ctx_) { | ||
AMDGPUDriver::get_instance().context_set_current(old_ctx_); | ||
} | ||
} | ||
}; | ||
|
||
ContextGuard get_guard() { | ||
return ContextGuard(this); | ||
} | ||
|
||
std::unique_lock<std::mutex> get_lock_guard() { | ||
return std::unique_lock<std::mutex>(lock_); | ||
} | ||
|
||
static AMDGPUContext &get_instance(); | ||
}; | ||
|
||
} // namespace lang | ||
} // namespace taichi |
Oops, something went wrong.