Skip to content

Commit

Permalink
Revert "[llvm] (Decomp of #5251 11/n) Enable parallel compilation on …
Browse files Browse the repository at this point in the history
…CPU backend (#5394)"

This reverts commit f6b40de.
  • Loading branch information
feisuzhu committed Jul 12, 2022
1 parent 7f97f9c commit 2827db2
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 111 deletions.
38 changes: 8 additions & 30 deletions taichi/codegen/cpu/codegen_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,39 +302,17 @@ FunctionType CodeGenCPU::codegen() {
kernel->lower(/*to_executable=*/false);
}

auto block = dynamic_cast<Block *>(kernel->ir.get());
auto &worker = get_llvm_program(kernel->program)->compilation_workers;
TI_ASSERT(block);

auto &offloads = block->statements;
std::vector<LLVMCompiledData> data(offloads.size());
using TaskFunc = int32 (*)(void *);
std::vector<TaskFunc> task_funcs(offloads.size());
for (int i = 0; i < offloads.size(); i++) {
auto compile_func = [&, i] {
auto offload =
irpass::analysis::clone(offloads[i].get(), offloads[i]->get_kernel());
irpass::re_id(offload.get());
auto new_data = this->modulegen(nullptr, offload->as<OffloadedStmt>());
data[i].tasks = std::move(new_data.tasks);
data[i].module = std::move(new_data.module);
};
if (kernel->is_evaluator) {
compile_func();
} else {
worker.enqueue(compile_func);
}
}
if (!kernel->is_evaluator) {
worker.flush();
}
CodeGenLLVMCPU gen(kernel, ir);
auto compiled_res = gen.run_compilation();

CPUModuleToFunctionConverter converter{gen.tlctx,
llvm_prog->get_runtime_executor()};
std::vector<LLVMCompiledData> data_list;
data_list.push_back(std::move(compiled_res));
if (!kernel->is_evaluator) {
cache_module(kernel_key, data);
cache_module(kernel_key, data_list);
}

CPUModuleToFunctionConverter converter(
tlctx, get_llvm_program(prog)->get_runtime_executor());
return converter.convert(kernel, std::move(data));
return converter.convert(this->kernel, std::move(data_list));
}
TLANG_NAMESPACE_END
2 changes: 1 addition & 1 deletion taichi/program/compile_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ CompileConfig::CompileConfig() {
print_kernel_llvm_ir = false;
print_kernel_nvptx = false;
print_kernel_llvm_ir_optimized = false;
num_compile_threads = 2;
num_compile_threads = 0;

// CUDA backend options:
device_memory_GB = 1; // by default, preallocate 1 GB GPU memory
Expand Down
131 changes: 51 additions & 80 deletions tests/python/test_offline_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
from os import listdir, remove, rmdir, stat
from os.path import join
from tempfile import mkdtemp
from time import sleep
from typing import List

import pytest

Expand Down Expand Up @@ -38,11 +36,12 @@ def get_cache_files_size(path):
return result


def get_expected_num_cache_files(num_offloads: List[int] = None) -> int:
if not num_offloads:
def get_expected_num_cache_files(num_kernels: int) -> int:
if num_kernels == 0:
return 0
NUM_CACHE_FILES_PER_KERNEL = 1
# metadata.{json, tcb}
return 2 + sum(num_offloads)
return 2 + NUM_CACHE_FILES_PER_KERNEL * num_kernels


def tmp_offline_cache_file_path():
Expand Down Expand Up @@ -101,11 +100,10 @@ def python_kernel3(a, mat):


simple_kernels_to_test = [
(kernel0, (), python_kernel0, 1),
(kernel1, (100, 200, 10.2), python_kernel1, 1),
(kernel2, (1024, ), python_kernel2, 3),
(kernel3, (10, ti.Matrix([[1, 2], [256, 1024]],
ti.i32)), python_kernel3, 1),
(kernel0, (), python_kernel0),
(kernel1, (100, 200, 10.2), python_kernel1),
(kernel2, (1024, ), python_kernel2),
(kernel3, (10, ti.Matrix([[1, 2], [256, 1024]], ti.i32)), python_kernel3),
]


Expand All @@ -129,29 +127,28 @@ def wrapped(*args, **kwargs):


@_test_offline_cache_dec
def _test_offline_cache_for_a_kernel(curr_arch, kernel, args, result,
num_offloads):
def _test_offline_cache_for_a_kernel(curr_arch, kernel, args, result):
count_of_cache_file = len(listdir(tmp_offline_cache_file_path()))

ti.init(arch=curr_arch,
enable_fallback=False,
**current_thread_ext_options())
res1 = kernel(*args)
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files()
) - count_of_cache_file == get_expected_num_cache_files(0)

ti.init(arch=curr_arch,
enable_fallback=False,
**current_thread_ext_options())
assert len(listdir(tmp_offline_cache_file_path(
))) - count_of_cache_file == get_expected_num_cache_files([num_offloads])
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files(1)
res2 = kernel(*args)
assert res1 == test_utils.approx(result) and res1 == test_utils.approx(
res2)

ti.reset()
assert len(listdir(tmp_offline_cache_file_path(
))) - count_of_cache_file == get_expected_num_cache_files([num_offloads])
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files(1)


@_test_offline_cache_dec
Expand All @@ -163,26 +160,26 @@ def _test_closing_offline_cache_for_a_kernel(curr_arch, kernel, args, result):
offline_cache_file_path=tmp_offline_cache_file_path())
res1 = kernel(*args)
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files()
) - count_of_cache_file == get_expected_num_cache_files(0)

ti.init(arch=curr_arch,
enable_fallback=False,
offline_cache_file_path=tmp_offline_cache_file_path())
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files()
) - count_of_cache_file == get_expected_num_cache_files(0)
res2 = kernel(*args)

assert res1 == test_utils.approx(result) and res1 == test_utils.approx(
res2)

ti.reset()
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files()
) - count_of_cache_file == get_expected_num_cache_files(0)


@pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
def test_closing_offline_cache(curr_arch):
for kernel, args, get_res, num_offloads in simple_kernels_to_test:
for kernel, args, get_res in simple_kernels_to_test:
_test_closing_offline_cache_for_a_kernel(curr_arch=curr_arch,
kernel=kernel,
args=args,
Expand All @@ -191,13 +188,11 @@ def test_closing_offline_cache(curr_arch):

@pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
def test_offline_cache_per_kernel(curr_arch):
for kernel, args, get_res, num_offloads in simple_kernels_to_test:
_test_offline_cache_for_a_kernel(
curr_arch=curr_arch,
kernel=kernel,
args=args,
result=get_res(*args),
num_offloads=num_offloads if curr_arch is ti.cpu else 1)
for kernel, args, get_res in simple_kernels_to_test:
_test_offline_cache_for_a_kernel(curr_arch=curr_arch,
kernel=kernel,
args=args,
result=get_res(*args))


@pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
Expand Down Expand Up @@ -229,18 +224,18 @@ def compute_y():
**current_thread_ext_options())
helper()
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files()
) - count_of_cache_file == get_expected_num_cache_files(0)

ti.init(arch=curr_arch,
enable_fallback=False,
**current_thread_ext_options())
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files([8])
) - count_of_cache_file == get_expected_num_cache_files(8)
helper()

ti.reset()
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files([8])
) - count_of_cache_file == get_expected_num_cache_files(8)


@pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
Expand All @@ -264,7 +259,7 @@ def np_kernel(a, b):
np_mat3 = mat3.to_numpy()

assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files()
) - count_of_cache_file == get_expected_num_cache_files(0)
ti.init(arch=curr_arch,
enable_fallback=False,
**current_thread_ext_options())
Expand All @@ -274,7 +269,7 @@ def np_kernel(a, b):
enable_fallback=False,
**current_thread_ext_options())
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files([1])
) - count_of_cache_file == get_expected_num_cache_files(1)

assert (kernel(mat1, mat1).to_numpy() == np_kernel(np_mat1, np_mat1)).all()
assert (kernel(mat1, mat2).to_numpy() == np_kernel(np_mat1, np_mat2)).all()
Expand All @@ -283,7 +278,7 @@ def np_kernel(a, b):

ti.reset()
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files([1])
) - count_of_cache_file == get_expected_num_cache_files(1)


@pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
Expand All @@ -306,7 +301,7 @@ def helper():
assert y[None] == test_utils.approx(7.28)

assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files()
) - count_of_cache_file == get_expected_num_cache_files(0)
ti.init(arch=curr_arch,
enable_fallback=False,
**current_thread_ext_options())
Expand All @@ -316,12 +311,12 @@ def helper():
enable_fallback=False,
**current_thread_ext_options())
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files([4])
) - count_of_cache_file == get_expected_num_cache_files(4)
helper()

ti.reset()
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files([4])
) - count_of_cache_file == get_expected_num_cache_files(4)


@pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
Expand All @@ -330,31 +325,27 @@ def test_calling_many_kernels(curr_arch):
count_of_cache_file = len(listdir(tmp_offline_cache_file_path()))

def helper():
for kernel, args, get_res, num_offloads in simple_kernels_to_test:
for kernel, args, get_res in simple_kernels_to_test:
assert (kernel(*args) == test_utils.approx(get_res(*args)))

ti.init(arch=curr_arch,
enable_fallback=False,
**current_thread_ext_options())
helper()
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files()
) - count_of_cache_file == get_expected_num_cache_files(0)

ti.init(arch=curr_arch,
enable_fallback=False,
**current_thread_ext_options())
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files([
kern[3] if curr_arch is ti.cpu else 1
for kern in simple_kernels_to_test
])
) - count_of_cache_file == get_expected_num_cache_files(
len(simple_kernels_to_test))
helper()
ti.reset()
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files([
kern[3] if curr_arch is ti.cpu else 1
for kern in simple_kernels_to_test
])
) - count_of_cache_file == get_expected_num_cache_files(
len(simple_kernels_to_test))


@pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
Expand All @@ -371,7 +362,7 @@ def helper():
c += i

assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files()
) - count_of_cache_file == get_expected_num_cache_files(0)
ti.init(arch=curr_arch,
enable_fallback=False,
default_fp=ti.f32,
Expand All @@ -383,14 +374,12 @@ def helper():
default_fp=ti.f64,
**current_thread_ext_options())
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files(
[2] if curr_arch is ti.cpu else [1])
) - count_of_cache_file == get_expected_num_cache_files(1)
helper()

ti.reset()
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files(
[2, 2] if curr_arch is ti.cpu else [1, 1])
) - count_of_cache_file == get_expected_num_cache_files(2)
ti.init(arch=curr_arch,
enable_fallback=False,
default_fp=ti.f32,
Expand All @@ -400,8 +389,7 @@ def helper():

ti.reset()
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files(
[2, 2] if curr_arch is ti.cpu else [1, 1])
) - count_of_cache_file == get_expected_num_cache_files(2)


@pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
Expand All @@ -420,9 +408,8 @@ def only_init(max_size):

def run_simple_kernels(max_size):
only_init(max_size)
for kernel, args, get_res, num_offloads in simple_kernels_to_test:
for kernel, args, get_res in simple_kernels_to_test:
assert kernel(*args) == test_utils.approx(get_res(*args))
sleep(1) # make sure the kernels are not used in the same second

kernel_count = len(simple_kernels_to_test)
rem_factor = 1 if policy in [
Expand All @@ -431,39 +418,23 @@ def run_simple_kernels(max_size):
count_of_cache_file = len(listdir(tmp_offline_cache_file_path()))

assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files()
) - count_of_cache_file == get_expected_num_cache_files(0)

run_simple_kernels(1024**3) # 1GB
ti.reset() # Dumping cache data
size_of_cache_files = get_cache_files_size(tmp_offline_cache_file_path())
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files([
kern[3] if curr_arch is ti.cpu else 1
for kern in simple_kernels_to_test
])
) - count_of_cache_file == get_expected_num_cache_files(
len(simple_kernels_to_test))

only_init(size_of_cache_files * 2)
ti.reset()
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files([
kern[3] if curr_arch is ti.cpu else 1
for kern in simple_kernels_to_test
])
) - count_of_cache_file == get_expected_num_cache_files(
len(simple_kernels_to_test))

only_init(size_of_cache_files)
ti.reset()
rem = 0
if policy in ['never', 'version']:
rem = sum([
kern[3] if curr_arch is ti.cpu else 1
for kern in simple_kernels_to_test
])
else:
for i in range(
min(kernel_count - int(factor * kernel_count), kernel_count)):
rem += simple_kernels_to_test[kernel_count - i -
1][3] if curr_arch is ti.cpu else 1
if rem > 0:
rem += 2
assert len(listdir(
tmp_offline_cache_file_path())) - count_of_cache_file == rem
assert len(listdir(tmp_offline_cache_file_path())
) - count_of_cache_file == get_expected_num_cache_files(
int(len(simple_kernels_to_test) * rem_factor))

0 comments on commit 2827db2

Please sign in to comment.