diff --git a/CMakeLists.txt b/CMakeLists.txt index da6c106d64d0de..98a4016a223623 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,6 +121,10 @@ if (TI_WITH_CUDA) set(CUDA_ARCH "cuda") endif() +if (TI_WITH_AMDGPU) + set(AMDGPU_ARCH "amdgpu") +endif() + if (TI_WITH_DX12) set(DX12_ARCH "dx12") endif() diff --git a/cmake/TaichiCore.cmake b/cmake/TaichiCore.cmake index 1b9f912cc6174a..4a0cff8cf91708 100644 --- a/cmake/TaichiCore.cmake +++ b/cmake/TaichiCore.cmake @@ -4,6 +4,7 @@ option(TI_LLVM_15 "Switch to LLVM 15" OFF) option(TI_WITH_METAL "Build with the Metal backend" ON) option(TI_WITH_CUDA "Build with the CUDA backend" ON) option(TI_WITH_CUDA_TOOLKIT "Build with the CUDA toolkit" OFF) +option(TI_WITH_AMDGPU "Build with the AMDGPU backend" OFF) option(TI_WITH_OPENGL "Build with the OpenGL backend" ON) option(TI_WITH_CC "Build with the C backend" ON) option(TI_WITH_VULKAN "Build with the Vulkan backend" OFF) @@ -35,6 +36,12 @@ if(ANDROID) set(TI_WITH_DX12 OFF) endif() +if(AMDGPU) + set(TI_WITH_LLVM ON) + set(TI_WITH_CUDA OFF) + set(TI_WITH_AMDGPU ON) +endif() + if(UNIX AND NOT APPLE) # Handy helper for Linux # https://stackoverflow.com/a/32259072/12003165 @@ -54,6 +61,10 @@ if (APPLE) set(TI_WITH_CC OFF) message(WARNING "C backend not supported on OS X. Setting TI_WITH_CC to OFF.") endif() + if (TI_WITH_AMDGPU) + set(TI_WITH_AMDGPU OFF) + message(WARNING "AMDGPU backend not supported on OS X. Setting TI_WITH_AMDGPU to OFF.") + endif() endif() if (WIN32) @@ -61,6 +72,10 @@ if (WIN32) set(TI_WITH_CC OFF) message(WARNING "C backend not supported on Windows. Setting TI_WITH_CC to OFF.") endif() + if (TI_WITH_AMDGPU) + set(TI_WITH_AMDGPU OFF) + message(WARNING "AMDGPU backend not supported on Windows. Setting TI_WITH_AMDGPU to OFF.") + endif() endif() if(TI_WITH_VULKAN) @@ -226,6 +241,17 @@ if(TI_WITH_LLVM) target_link_libraries(${CORE_LIBRARY_NAME} PRIVATE cuda_rhi) endif() + if (TI_WITH_AMDGPU) + llvm_map_components_to_libnames(llvm_amdgpu_libs AMDGPU) + # add_subdirectory(taichi/codegen/amdgpu) + # add_subdirectory(taichi/runtime/amdgpu) + add_subdirectory(taichi/rhi/amdgpu) + + # target_link_libraries(${CORE_LIBRARY_NAME} PRIVATE amdgpu_codegen) + # target_link_libraries(${CORE_LIBRARY_NAME} PRIVATE amdgpu_runtime) + target_link_libraries(${CORE_LIBRARY_NAME} PRIVATE amdgpu_rhi) + endif() + if (TI_WITH_DX12) llvm_map_components_to_libnames(llvm_directx_libs DirectX) diff --git a/tests/cpp/backends/amdgpu_device_test.cpp b/tests/cpp/backends/amdgpu_device_test.cpp new file mode 100644 index 00000000000000..c2bb24ae7ea05c --- /dev/null +++ b/tests/cpp/backends/amdgpu_device_test.cpp @@ -0,0 +1,102 @@ +#include "gtest/gtest.h" + +#ifdef TI_WITH_AMDGPU +#include "taichi/ir/ir_builder.h" +#include "taichi/rhi/amdgpu/amdgpu_driver.h" +#include "taichi/rhi/amdgpu/amdgpu_context.h" +#include "taichi/rhi/amdgpu/amdgpu_device.h" +#include "tests/cpp/program/test_program.h" + +namespace taichi { +namespace lang { +TEST(AMDGPU, CreateDeviceAndAlloc) { + std::unique_ptr device = + std::make_unique(); + EXPECT_TRUE(device != nullptr); + taichi::lang::Device::AllocParams params; + params.size = 1048576; + params.host_read = false; + params.host_write = false; + const taichi::lang::DeviceAllocation device_alloc = + device->allocate_memory(params); + + // The purpose of the device_alloc_guard is to rule out double free + const taichi::lang::DeviceAllocationGuard device_alloc_guard(device_alloc); + // Map to CPU, write some values, then check those values + void *mapped = device->map(device_alloc); + int *mapped_int = reinterpret_cast(mapped); + for (int i = 0; i < 100; i++) { + mapped_int[i] = i; + } + device->unmap(device_alloc); + + mapped = device->map(device_alloc); + mapped_int = reinterpret_cast(mapped); + for (int i = 0; i < 100; i++) { + EXPECT_EQ(mapped_int[i], i); + } + device->unmap(device_alloc); +} + +TEST(AMDGPU, ImportMemory) { + std::unique_ptr device = + std::make_unique(); + EXPECT_TRUE(device != nullptr); + + int *ptr = nullptr; + AMDGPUDriver::get_instance().malloc_managed((void **)&ptr, 400, + HIP_MEM_ATTACH_GLOBAL); + const taichi::lang::DeviceAllocation device_alloc = + device->import_memory(ptr, 400); + + for (int i = 0; i < 100; i++) { + ptr[i] = i; + } + + taichi::lang::Device::AllocParams params; + params.size = 400; + params.host_read = false; + params.host_write = false; + const taichi::lang::DeviceAllocation device_dest = + device->allocate_memory(params); + const taichi::lang::DeviceAllocationGuard device_dest_guard(device_dest); + + AMDGPUDriver::get_instance().stream_synchronize(nullptr); + device->memcpy_internal(device_dest.get_ptr(0), device_alloc.get_ptr(0), 400); + void *mapped = device->map(device_dest); + int *mapped_int = reinterpret_cast(mapped); + + for (int i = 0; i < 100; i++) { + EXPECT_EQ(mapped_int[i], i); + } + device->unmap(device_dest); + // import memory should been deallocated manually + AMDGPUDriver::get_instance().mem_free(ptr); +} + +TEST(AMDGPU, CreateContextAndGetMemInfo) { + auto total_size = AMDGPUContext::get_instance().get_total_memory(); + auto free_size = AMDGPUContext::get_instance().get_free_memory(); + EXPECT_GE(total_size, free_size); + EXPECT_GE(free_size, 0); +} + +TEST(AMDGPU, LaunchKernel) { + // NOT_IMPLEMENTED + // runtime part + // vec kernel +} + +TEST(AMDGPU, FetchResult) { + // NOT_IMPLEMENTED + // runtime part + // reduce kernel +} + +TEST(AMDGPU, CodeGen) { + // NOT_IMPLEMENTED +} + +} // namespace lang +} // namespace taichi +#endif \ No newline at end of file