Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[amdgpu] Part0 add render hardware interface and cmake #6975

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ jobs:
. .github/workflows/scripts/common-utils.sh

ci-docker-run-amdgpu --name taichi-build \
registry.taichigraphics.com/taichidev-ubuntu18.04.amdgpu:v0.0.3 \
registry.taichigraphics.com/taichidev-ubuntu18.04.amdgpu:v0.0.4 \
/home/dev/taichi/.github/workflows/scripts/build.py

env:
Expand All @@ -310,7 +310,7 @@ jobs:
. .github/workflows/scripts/common-utils.sh

ci-docker-run-amdgpu --name taichi-test \
registry.taichigraphics.com/taichidev-ubuntu18.04.amdgpu:v0.0.3 \
registry.taichigraphics.com/taichidev-ubuntu18.04.amdgpu:v0.0.4 \
/home/dev/taichi/.github/workflows/scripts/unix_test.sh
env:
PY: '3.8'
Expand Down
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,10 @@ if (TI_WITH_CUDA)
set(CUDA_ARCH "cuda")
endif()

if (TI_WITH_AMDGPU)
set(AMDGPU_ARCH "amdgpu")
endif()

if (TI_WITH_DX12)
set(DX12_ARCH "dx12")
endif()
Expand Down
55 changes: 36 additions & 19 deletions cmake/TaichiCore.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ option(TI_WITH_LLVM "Build with LLVM backends" ON)
option(TI_WITH_METAL "Build with the Metal backend" ON)
option(TI_WITH_CUDA "Build with the CUDA backend" ON)
option(TI_WITH_CUDA_TOOLKIT "Build with the CUDA toolkit" OFF)
option(TI_WITH_AMDGPU "Build with the AMDGPU backend" OFF)
option(TI_WITH_OPENGL "Build with the OpenGL backend" ON)
option(TI_WITH_CC "Build with the C backend" ON)
option(TI_WITH_VULKAN "Build with the Vulkan backend" OFF)
Expand Down Expand Up @@ -53,13 +54,21 @@ if (APPLE)
set(TI_WITH_CC OFF)
message(WARNING "C backend not supported on OS X. Setting TI_WITH_CC to OFF.")
endif()
if (TI_WITH_AMDGPU)
set(TI_WITH_AMDGPU OFF)
message(WARNING "AMDGPU backend not supported on OS X. Setting TI_WITH_AMDGPU to OFF.")
endif()
endif()

if (WIN32)
if (TI_WITH_CC)
set(TI_WITH_CC OFF)
message(WARNING "C backend not supported on Windows. Setting TI_WITH_CC to OFF.")
endif()
if (TI_WITH_AMDGPU)
set(TI_WITH_AMDGPU OFF)
message(WARNING "AMDGPU backend not supported on Windows. Setting TI_WITH_AMDGPU to OFF.")
endif()
endif()

if(TI_WITH_VULKAN)
Expand Down Expand Up @@ -97,6 +106,12 @@ if(TI_WITH_LLVM)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_WITH_LLVM")
endif()

if (TI_LLVM_15)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_LLVM_15")
else()
set(TI_WITH_DX12 OFF)
endif()

## This version var is only used to locate slim_libdevice.10.bc
if(NOT CUDA_VERSION)
set(CUDA_VERSION 10.0)
Expand All @@ -108,6 +123,12 @@ if (TI_WITH_CUDA)
list(APPEND TAICHI_CORE_SOURCE ${TAICHI_CUDA_RUNTIME_SOURCE})
endif()

if (TI_WITH_AMDGPU)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_WITH_AMDGPU")
# file(GLOB TAICHI_AMDGPU_RUNTIME_SOURCE "taichi/runtime/amdgpu/runtime.cpp")
list(APPEND TAIHI_CORE_SOURCE ${TAICHI_AMDGPU_RUNTIME_SOURCE})
endif()

if (TI_WITH_DX12)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_WITH_DX12")
endif()
Expand All @@ -119,6 +140,10 @@ if (TI_WITH_CC)
list(APPEND TAICHI_CORE_SOURCE ${TAICHI_CC_SOURCE})
endif()

# This compiles all the libraries with -fPIC, which is critical to link a static
# library into a shared lib.
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

set(CORE_LIBRARY_NAME taichi_core)
add_library(${CORE_LIBRARY_NAME} OBJECT ${TAICHI_CORE_SOURCE})

Expand Down Expand Up @@ -215,6 +240,12 @@ if(TI_WITH_LLVM)
target_link_libraries(${CORE_LIBRARY_NAME} PRIVATE cuda_rhi)
endif()

if (TI_WITH_AMDGPU)
llvm_map_components_to_libnames(llvm_amdgpu_libs AMDGPU)
add_subdirectory(taichi/rhi/amdgpu)
target_link_libraries(${CORE_LIBRARY_NAME} PRIVATE amdgpu_rhi)
endif()

if (TI_WITH_DX12)
llvm_map_components_to_libnames(llvm_directx_libs DirectX)

Expand Down Expand Up @@ -321,19 +352,12 @@ endif()
if (TI_WITH_VULKAN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_WITH_VULKAN")
if (APPLE)
# The latest Molten-vk v1.2.0 and v1.1.11 breaks GGUI: mpm3d_ggui.py
# So we have to manually download and install Molten-vk v1.10.0
#
# Uncomment the following lines if the mpm3d_ggui.py runs well with the latest Molten-vk
#find_library(MOLTEN_VK libMoltenVK.dylib PATHS $HOMEBREW_CELLAR/molten-vk $VULKAN_SDK REQUIRED)
#configure_file(${MOLTEN_VK} ${CMAKE_BINARY_DIR}/libMoltenVK.dylib COPYONLY)
#message(STATUS "MoltenVK library ${MOLTEN_VK}")

if(NOT EXISTS ${CMAKE_BINARY_DIR}/libMoltenVK.dylib)
execute_process(COMMAND curl -L -o ${CMAKE_BINARY_DIR}/libMoltenVK.zip https://github.com/taichi-dev/taichi_assets/files/9977436/libMoltenVK.dylib.zip)
execute_process(COMMAND tar -xf ${CMAKE_BINARY_DIR}/libMoltenVK.zip --directory ${CMAKE_BINARY_DIR})
find_library(MOLTEN_VK libMoltenVK.dylib PATHS $HOMEBREW_CELLAR/molten-vk $VULKAN_SDK REQUIRED)
configure_file(${MOLTEN_VK} ${CMAKE_BINARY_DIR}/libMoltenVK.dylib COPYONLY)
message(STATUS "MoltenVK library ${MOLTEN_VK}")
if (EXISTS ${CMAKE_BINARY_DIR}/libMoltenVK.dylib)
install(FILES ${CMAKE_BINARY_DIR}/libMoltenVK.dylib DESTINATION ${INSTALL_LIB_DIR}/runtime)
endif()
install(FILES ${CMAKE_BINARY_DIR}/libMoltenVK.dylib DESTINATION ${INSTALL_LIB_DIR}/runtime)
endif()
add_subdirectory(taichi/rhi/vulkan)
add_subdirectory(taichi/runtime/program_impls/vulkan)
Expand Down Expand Up @@ -428,13 +452,6 @@ if(TI_WITH_PYTHON)
target_link_options(${CORE_WITH_PYBIND_LIBRARY_NAME} PUBLIC -Wl,--exclude-libs=ALL)
endif()

if (TI_WITH_BACKTRACE)
# Defined by external/backward-cpp:
# This will add libraries, definitions and include directories needed by backward
# by setting each property on the target.
target_link_libraries(${CORE_WITH_PYBIND_LIBRARY_NAME} PRIVATE ${BACKWARD_ENABLE})
endif()

if(TI_WITH_GGUI)
target_compile_definitions(${CORE_WITH_PYBIND_LIBRARY_NAME} PRIVATE -DTI_WITH_GGUI)
target_link_libraries(${CORE_WITH_PYBIND_LIBRARY_NAME} PRIVATE taichi_ui_vulkan)
Expand Down
2 changes: 1 addition & 1 deletion external/SPIRV-Tools
Submodule SPIRV-Tools updated 234 files
22 changes: 22 additions & 0 deletions taichi/rhi/amdgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# ./taichi/rhi/amdgpu/CMakeLists.txt

set(AMDGPU_RHI amdgpu_rhi)
add_library(${AMDGPU_RHI})
target_sources(${AMDGPU_RHI}
PRIVATE
amdgpu_device.cpp
amdgpu_caching_allocator.cpp
amdgpu_context.cpp
amdgpu_driver.cpp
)

target_include_directories(${AMDGPU_RHI}
PRIVATE
${PROJECT_SOURCE_DIR}
${PROJECT_SOURCE_DIR}/external/eigen
${PROJECT_SOURCE_DIR}/external/spdlog/include
${PROJECT_SOURCE_DIR}/external/glfw/include
${LLVM_INCLUDE_DIRS}
)

target_link_libraries(${AMDGPU_RHI} PRIVATE interop_rhi)
40 changes: 40 additions & 0 deletions taichi/rhi/amdgpu/amdgpu_caching_allocator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#include "taichi/rhi/amdgpu/amdgpu_caching_allocator.h"

namespace taichi {
namespace lang {
namespace amdgpu {

AmdgpuCachingAllocator::AmdgpuCachingAllocator(LlvmDevice *device)
: device_(device) {
}

uint64_t *AmdgpuCachingAllocator::allocate(
const LlvmDevice::LlvmRuntimeAllocParams &params) {
uint64_t *ret{nullptr};
auto size_aligned = taichi::iroundup(params.size, taichi_page_size);
auto it_blk = mem_blocks_.lower_bound(size_aligned);

if (it_blk != mem_blocks_.end()) {
size_t remaining_sz = it_blk->first - size_aligned;
if (remaining_sz > 0) {
TI_ASSERT(remaining_sz % taichi_page_size == 0);
auto remaining_head =
reinterpret_cast<uint8_t *>(it_blk->second) + size_aligned;
mem_blocks_.insert(
{remaining_sz, reinterpret_cast<uint64_t *>(remaining_head)});
}
ret = it_blk->second;
mem_blocks_.erase(it_blk);
} else {
ret = device_->allocate_llvm_runtime_memory_jit(params);
}
return ret;
}

void AmdgpuCachingAllocator::release(size_t sz, uint64_t *ptr) {
mem_blocks_.insert({sz, ptr});
}

} // namespace amdgpu
} // namespace lang
} // namespace taichi
27 changes: 27 additions & 0 deletions taichi/rhi/amdgpu/amdgpu_caching_allocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#pragma once

#include "taichi/common/core.h"
#include "taichi/math/arithmetic.h"
#include "taichi/rhi/llvm/llvm_device.h"
#include <stdint.h>
#include <map>

namespace taichi {
namespace lang {
namespace amdgpu {

class AmdgpuCachingAllocator {
public:
AmdgpuCachingAllocator(LlvmDevice *device);

uint64_t *allocate(const LlvmDevice::LlvmRuntimeAllocParams &params);
void release(size_t sz, uint64_t *ptr);

private:
std::multimap<size_t, uint64_t *> mem_blocks_;
LlvmDevice *device_{nullptr};
};

} // namespace amdgpu
} // namespace lang
} // namespace taichi
93 changes: 93 additions & 0 deletions taichi/rhi/amdgpu/amdgpu_context.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#define TI_RUNTIME_HOST
#include "amdgpu_context.h"

#include <unordered_map>
#include <mutex>

#include "taichi/util/lang_util.h"
#include "taichi/program/program.h"
#include "taichi/system/threading.h"
#include "taichi/rhi/amdgpu/amdgpu_driver.h"
#include "taichi/analysis/offline_cache_util.h"

namespace taichi {
namespace lang {

AMDGPUContext::AMDGPUContext()
: driver_(AMDGPUDriver::get_instance_without_context()) {
dev_count_ = 0;
driver_.init(0);
driver_.device_get_count(&dev_count_);
driver_.device_get(&device_, 0);

char name[128];
driver_.device_get_name(name, 128, device_);

TI_TRACE("Using AMDGPU device [id=0]: {}", name);

driver_.context_create(&context_, 0, device_);

const auto GB = std::pow(1024.0, 3.0);
TI_TRACE("Total memory {:.2f} GB; free memory {:.2f} GB",
get_total_memory() / GB, get_free_memory() / GB);

void *hip_device_prop = std::malloc(HIP_DEVICE_PROPERTIES_STRUCT_SIZE);
driver_.device_get_prop(hip_device_prop, device_);
compute_capability_ = *((int *)hip_device_prop + HIP_DEVICE_GCN_ARCH);
std::free(hip_device_prop);

mcpu_ = fmt::format("gfx{}", compute_capability_);

TI_TRACE("Emitting AMDGPU code for {}", mcpu_);
}

std::size_t AMDGPUContext::get_total_memory() {
std::size_t ret, _;
driver_.mem_get_info(&_, &ret);
return ret;
}

std::size_t AMDGPUContext::get_free_memory() {
std::size_t ret, _;
driver_.mem_get_info(&ret, &_);
return ret;
}

std::string AMDGPUContext::get_device_name() {
constexpr uint32_t kMaxNameStringLength = 128;
char name[kMaxNameStringLength];
driver_.device_get_name(name, kMaxNameStringLength /*=128*/, device_);
std::string str(name);
return str;
}

void AMDGPUContext::launch(void *func,
const std::string &task_name,
void *arg_pointers,
unsigned grid_dim,
unsigned block_dim,
std::size_t dynamic_shared_mem_bytes,
int arg_bytes) {
if (grid_dim > 0) {
std::lock_guard<std::mutex> _(lock_);
void *config[] = {(void *)0x01, const_cast<void *>(arg_pointers),
(void *)0x02, &arg_bytes, (void *)0x03};
driver_.launch_kernel(func, grid_dim, 1, 1, block_dim, 1, 1,
dynamic_shared_mem_bytes, nullptr, nullptr,
reinterpret_cast<void **>(&config));
}
if (debug_) {
driver_.stream_synchronize(nullptr);
}
}

AMDGPUContext::~AMDGPUContext() {
}

AMDGPUContext &AMDGPUContext::get_instance() {
static auto context = new AMDGPUContext();
return *context;
}

} // namespace lang
} // namespace taichi
Loading