Skip to content

Commit

Permalink
[ci] [amdgpu] Enable amdgpu backend python unit tests (taichi-dev#7293)
Browse files Browse the repository at this point in the history
Issue: #taichi-dev#6434

### Brief Summary
1. fix amdgpu backend bugs:
    a. codegen typos
    b. add more types support for `sgn`
    c. use temporary method to handle `runtime.cpp` error
2. enable amdgpu backend python unit test
a. because of the lack of `print` support, temporarily disable all
`print` related tests on amdgpu backend.
    b. there is still something wrong in `gdar_mpm` and `ad_if` tests.

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
2 people authored and quadpixels committed May 13, 2023
1 parent ede3b62 commit b9e15a0
Show file tree
Hide file tree
Showing 19 changed files with 136 additions and 46 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/scripts/unix_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ if [ -z "$GPU_TEST" ]; then
fi
elif [ ! -z "$AMDGPU_TEST" ]; then
run-it cpu $(nproc)
# run-it amdgpu 4
run-it amdgpu 8
else
run-it cuda 8
run-it cpu $(nproc)
Expand Down
58 changes: 55 additions & 3 deletions taichi/codegen/amdgpu/codegen_amdgpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ class TaskCodeGenAMDGPU : public TaskCodeGenLLVM {
} else {
TI_NOT_IMPLEMENTED
}
} else if (op == UnaryOpType::sgn) {
} // TODO simplify the impl of sgn
else if (op == UnaryOpType::sgn) {
if (input_taichi_type->is_primitive(PrimitiveTypeID::i32)) {
auto ashr = builder->CreateAShr(input, 31);
auto sub = builder->CreateSub(0, input);
Expand Down Expand Up @@ -141,6 +142,57 @@ class TaskCodeGenAMDGPU : public TaskCodeGenLLVM {
builder->SetInsertPoint(bb_merge);
llvm_val[stmt] =
builder->CreateLoad(llvm::Type::getFloatTy(*llvm_context), cast);
} else if (input_taichi_type->is_primitive(PrimitiveTypeID::f64)) {
auto func = builder->GetInsertBlock()->getParent();
auto bb_oeq_then = BasicBlock::Create(*llvm_context, "oeq_then", func);
auto bb_oeq_else = BasicBlock::Create(*llvm_context, "oeq_else");
auto bb_merge = BasicBlock::Create(*llvm_context, "merge");
auto bb_olt_then = BasicBlock::Create(*llvm_context, "olt_then", func);
auto bb_olt_else = BasicBlock::Create(*llvm_context, "olt_else");

auto alloc = builder->CreateAlloca(
llvm::Type::getDoubleTy(*llvm_context), (unsigned)5);
auto newty = llvm::PointerType::get(
llvm::Type::getDoubleTy(*llvm_context), (unsigned)0);
auto cast = builder->CreateAddrSpaceCast(alloc, newty);
auto fcmp_oeq = builder->CreateFCmpOEQ(
input,
llvm::ConstantFP::get(llvm::Type::getDoubleTy(*llvm_context), 0));
builder->CreateCondBr(fcmp_oeq, bb_oeq_then, bb_oeq_else);
builder->SetInsertPoint(bb_oeq_then);
builder->CreateStore(
llvm::ConstantFP::get(llvm::Type::getDoubleTy(*llvm_context), 0),
cast);
builder->CreateBr(bb_merge);
bb_oeq_then = builder->GetInsertBlock();

func->getBasicBlockList().push_back(bb_oeq_else);
builder->SetInsertPoint(bb_oeq_else);
auto fcmp_olt = builder->CreateFCmpOLT(
input,
llvm::ConstantFP::get(llvm::Type::getDoubleTy(*llvm_context), 0));
builder->CreateCondBr(fcmp_olt, bb_olt_then, bb_olt_else);
bb_oeq_else = builder->GetInsertBlock();

builder->SetInsertPoint(bb_olt_then);
builder->CreateStore(
llvm::ConstantFP::get(llvm::Type::getDoubleTy(*llvm_context), -1),
cast);
builder->CreateBr(bb_merge);
bb_olt_then = builder->GetInsertBlock();

func->getBasicBlockList().push_back(bb_olt_else);
builder->SetInsertPoint(bb_olt_else);
builder->CreateStore(
llvm::ConstantFP::get(llvm::Type::getDoubleTy(*llvm_context), 1),
cast);
builder->CreateBr(bb_merge);
bb_olt_else = builder->GetInsertBlock();

func->getBasicBlockList().push_back(bb_merge);
builder->SetInsertPoint(bb_merge);
llvm_val[stmt] =
builder->CreateLoad(llvm::Type::getDoubleTy(*llvm_context), cast);
}
}
UNARY_STD(cos)
Expand Down Expand Up @@ -390,7 +442,7 @@ class TaskCodeGenAMDGPU : public TaskCodeGenLLVM {
llvm_val[stmt] = call("__ocml_pow_f16", {lhs, rhs});
} else if (ret_taichi_type->is_primitive(PrimitiveTypeID::f32)) {
llvm_val[stmt] = call("__ocml_pow_f32", {lhs, rhs});
} else if (ret_taichi_type->is_primitive(PrimitiveTypeID::i64)) {
} else if (ret_taichi_type->is_primitive(PrimitiveTypeID::f64)) {
llvm_val[stmt] = call("__ocml_pow_f64", {lhs, rhs});
} else if (ret_taichi_type->is_primitive(PrimitiveTypeID::i32)) {
auto sitofp_lhs_ =
Expand All @@ -408,7 +460,7 @@ class TaskCodeGenAMDGPU : public TaskCodeGenLLVM {
llvm_val[stmt] = call("__ocml_atan2_f16", {lhs, rhs});
} else if (ret_taichi_type->is_primitive(PrimitiveTypeID::f32)) {
llvm_val[stmt] = call("__ocml_atan2_f32", {lhs, rhs});
} else if (ret_taichi_type->is_primitive(PrimitiveTypeID::i64)) {
} else if (ret_taichi_type->is_primitive(PrimitiveTypeID::f64)) {
llvm_val[stmt] = call("__ocml_atan2_f64", {lhs, rhs});
} else {
TI_NOT_IMPLEMENTED
Expand Down
1 change: 1 addition & 0 deletions taichi/program/extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ bool is_extension_supported(Arch arch, Extension ext) {
{Extension::sparse, Extension::quant, Extension::quant_basic,
Extension::data64, Extension::adstack, Extension::bls,
Extension::assertion, Extension::mesh}},
{Arch::amdgpu, {Extension::assertion}},
{Arch::metal, {}},
{Arch::opengl, {Extension::extfunc}},
{Arch::gles, {}},
Expand Down
4 changes: 2 additions & 2 deletions taichi/runtime/llvm/llvm_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -620,8 +620,8 @@ void TaichiLLVMContext::link_module_with_amdgpu_libdevice(
}

for (auto &f : libdevice_module->functions()) {
auto func_ = module->getFunction(f.getName());
if (!func_ && starts_with(f.getName().lower(), "__" + libdevice))
auto func_name = libdevice.substr(0, libdevice.length() - 3);
if (starts_with(f.getName().lower(), "__" + func_name))
f.setLinkage(llvm::Function::CommonLinkage);
}

Expand Down
21 changes: 20 additions & 1 deletion taichi/runtime/llvm/runtime_module/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -760,8 +760,15 @@ void taichi_assert_format(LLVMRuntime *runtime,
const char *format,
int num_arguments,
uint64 *arguments) {
#ifdef ARCH_amdgpu
// TODO: find out why error with mark_force_no_inline
// llvm::SDValue llvm::SelectionDAG::getNode(unsigned int, const llvm::SDLoc
// &, llvm::EVT, llvm::SDValue, const llvm::SDNodeFlags): Assertion
// `VT.getSizeInBits() == Operand.getValueSizeInBits() && "Cannot BITCAST
// between types of different sizes!"' failed.
#else
mark_force_no_inline();

#endif
if (!enable_assert || test != 0)
return;
if (!runtime->error_code) {
Expand Down Expand Up @@ -1510,7 +1517,13 @@ void gpu_parallel_range_for(RuntimeContext *context,
range_for_xlogue epilogue,
const std::size_t tls_size) {
int idx = thread_idx() + block_dim() * block_idx() + begin;
#ifdef ARCH_amdgpu
// AMDGPU doesn't support dynamic array
// TODO: find a better way to set the tls_size (maybe like struct_for
alignas(8) char tls_buffer[64];
#else
alignas(8) char tls_buffer[tls_size];
#endif
auto tls_ptr = &tls_buffer[0];
if (prologue)
prologue(context, tls_ptr);
Expand Down Expand Up @@ -1588,7 +1601,13 @@ void gpu_parallel_mesh_for(RuntimeContext *context,
MeshForTaskFunc *func,
mesh_for_xlogue epilogue,
const std::size_t tls_size) {
#ifdef ARCH_amdgpu
// AMDGPU doesn't support dynamic array
// TODO: find a better way to set the tls_size (maybe like struct_for
alignas(8) char tls_buffer[64];
#else
alignas(8) char tls_buffer[tls_size];
#endif
auto tls_ptr = &tls_buffer[0];
for (int idx = block_idx(); idx < num_patches; idx += grid_dim()) {
if (prologue)
Expand Down
5 changes: 4 additions & 1 deletion tests/python/test_ad_gdar_diffmpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
from tests import test_utils


@test_utils.test(require=ti.extension.assertion, debug=True, exclude=[ti.cc])
# FIXME: gdar mpm on amdgpu backend(assign gale)
@test_utils.test(require=ti.extension.assertion,
debug=True,
exclude=[ti.cc, ti.amdgpu])
def test_gdar_mpm():
real = ti.f32

Expand Down
3 changes: 2 additions & 1 deletion tests/python/test_ad_if.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ def func():
func()


@test_utils.test()
#FIXME: amdgpu backend(assign gale)
@test_utils.test(exclude=ti.amdgpu)
def test_if_condition_depend_on_for_loop_index():
scalar = lambda: ti.field(dtype=ti.f32)
vec = lambda: ti.Vector.field(3, dtype=ti.f32)
Expand Down
3 changes: 1 addition & 2 deletions tests/python/test_cfg_continue.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from tests import test_utils


@test_utils.test()
@test_utils.test(exclude=[ti.amdgpu])
def test_cfg_continue():
x = ti.field(dtype=int, shape=1)
state = ti.field(dtype=int, shape=1)
Expand All @@ -13,7 +13,6 @@ def foo():
if state[p] == 0:
x[p] = 1
continue

if state[p] != 0:
print('test')

Expand Down
4 changes: 3 additions & 1 deletion tests/python/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,9 @@ def test_cli_run():


def test_cli_cache():
archs = {ti.cpu, ti.cuda, ti.opengl, ti.vulkan, ti.metal, ti.gles}
archs = {
ti.cpu, ti.cuda, ti.opengl, ti.vulkan, ti.metal, ti.gles, ti.amdgpu
}
archs = {v for v in archs if v in test_utils.expected_archs()}
exts = ('ll', 'bc', 'spv', 'metal', 'tcb', 'lock')
tmp_path = tempfile.mkdtemp()
Expand Down
4 changes: 2 additions & 2 deletions tests/python/test_debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def func():
func()


@test_utils.test(require=ti.extension.assertion,
@test_utils.test(require=[ti.extension.sparse, ti.extension.assertion],
debug=True,
gdb_trigger=False,
exclude=ti.metal)
Expand All @@ -83,7 +83,7 @@ def func():
func()


@test_utils.test(require=ti.extension.assertion,
@test_utils.test(require=[ti.extension.sparse, ti.extension.assertion],
debug=True,
gdb_trigger=False,
exclude=ti.metal)
Expand Down
27 changes: 16 additions & 11 deletions tests/python/test_internal_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
from tests import test_utils


@test_utils.test(
exclude=[ti.metal, ti.opengl, ti.gles, ti.cuda, ti.vulkan, ti.cc])
@test_utils.test(exclude=[
ti.metal, ti.opengl, ti.gles, ti.cuda, ti.vulkan, ti.cc, ti.amdgpu
])
def test_basic():
@ti.kernel
def test():
Expand All @@ -17,8 +18,9 @@ def test():
test()


@test_utils.test(
exclude=[ti.metal, ti.opengl, ti.gles, ti.cuda, ti.vulkan, ti.cc])
@test_utils.test(exclude=[
ti.metal, ti.opengl, ti.gles, ti.cuda, ti.vulkan, ti.cc, ti.amdgpu
])
def test_host_polling():
return

Expand All @@ -32,8 +34,9 @@ def test():
time.sleep(0.1)


@test_utils.test(
exclude=[ti.metal, ti.opengl, ti.gles, ti.cuda, ti.vulkan, ti.cc])
@test_utils.test(exclude=[
ti.metal, ti.opengl, ti.gles, ti.cuda, ti.vulkan, ti.cc, ti.amdgpu
])
def test_list_manager():
@ti.kernel
def test():
Expand All @@ -43,8 +46,9 @@ def test():
test()


@test_utils.test(
exclude=[ti.metal, ti.opengl, ti.gles, ti.cuda, ti.vulkan, ti.cc])
@test_utils.test(exclude=[
ti.metal, ti.opengl, ti.gles, ti.cuda, ti.vulkan, ti.cc, ti.amdgpu
])
def test_node_manager():
@ti.kernel
def test():
Expand All @@ -54,8 +58,9 @@ def test():
test()


@test_utils.test(
exclude=[ti.metal, ti.opengl, ti.gles, ti.cuda, ti.vulkan, ti.cc])
@test_utils.test(exclude=[
ti.metal, ti.opengl, ti.gles, ti.cuda, ti.vulkan, ti.cc, ti.amdgpu
])
def test_node_manager_gc():
@ti.kernel
def test_cpu():
Expand All @@ -64,7 +69,7 @@ def test_cpu():
test_cpu()


@test_utils.test(arch=[ti.cpu, ti.cuda], debug=True)
@test_utils.test(arch=[ti.cpu, ti.cuda, ti.amdgpu], debug=True)
def test_return():
@ti.kernel
def test_cpu():
Expand Down
2 changes: 1 addition & 1 deletion tests/python/test_lang.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test():
test()


@test_utils.test()
@test_utils.test(exclude=[ti.amdgpu])
def test_local_atomics():
n = 32
val = ti.field(ti.i32, shape=n)
Expand Down
2 changes: 1 addition & 1 deletion tests/python/test_native_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from tests import test_utils


@test_utils.test()
@test_utils.test(exclude=[ti.amdgpu])
def test_abs():
x = ti.field(ti.f32)

Expand Down
2 changes: 1 addition & 1 deletion tests/python/test_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
vector_dims = [3]
matrix_dims = [(1, 2), (2, 3)]
supported_archs_taichi_ndarray = [
ti.cpu, ti.cuda, ti.opengl, ti.vulkan, ti.metal
ti.cpu, ti.cuda, ti.opengl, ti.vulkan, ti.metal, ti.amdgpu
]


Expand Down
2 changes: 1 addition & 1 deletion tests/python/test_ndrange.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def example():
example()


@test_utils.test()
@test_utils.test(exclude=[ti.amdgpu])
def test_n_loop_var_neq_dimension():
@ti.kernel
def iter():
Expand Down
2 changes: 1 addition & 1 deletion tests/python/test_non_taichi_types_in_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from tests import test_utils


@test_utils.test()
@test_utils.test(exclude=ti.amdgpu)
def test_subscript_user_classes_in_kernel():
class MyList:
def __init__(self, elements):
Expand Down
4 changes: 2 additions & 2 deletions tests/python/test_offload_cross.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def ker():
assert ret[None] == 46


@test_utils.test()
@test_utils.test(exclude=ti.amdgpu)
def test_offload_with_cross_nested_for():
@ti.kernel
def run(a: ti.i32):
Expand All @@ -123,7 +123,7 @@ def run(a: ti.i32):
run(2)


@test_utils.test()
@test_utils.test(exclude=ti.amdgpu)
def test_offload_with_cross_if_inside_for():
@ti.kernel
def run(a: ti.i32):
Expand Down
Loading

0 comments on commit b9e15a0

Please sign in to comment.