Revert "[llvm] (Decomp of #5251 11/n) Enable parallel compilation on …

…CPU backend (#5394)" This reverts commit f6b40de.
taichi-dev · Jul 12, 2022 · 2827db2 · 2827db2
1 parent 7f97f9c
commit 2827db2
Show file tree

Hide file tree

Showing 3 changed files with 60 additions and 111 deletions.
diff --git a/taichi/codegen/cpu/codegen_cpu.cpp b/taichi/codegen/cpu/codegen_cpu.cpp
@@ -302,39 +302,17 @@ FunctionType CodeGenCPU::codegen() {
     kernel->lower(/*to_executable=*/false);
   }
 
-  auto block = dynamic_cast<Block *>(kernel->ir.get());
-  auto &worker = get_llvm_program(kernel->program)->compilation_workers;
-  TI_ASSERT(block);
-
-  auto &offloads = block->statements;
-  std::vector<LLVMCompiledData> data(offloads.size());
-  using TaskFunc = int32 (*)(void *);
-  std::vector<TaskFunc> task_funcs(offloads.size());
-  for (int i = 0; i < offloads.size(); i++) {
-    auto compile_func = [&, i] {
-      auto offload =
-          irpass::analysis::clone(offloads[i].get(), offloads[i]->get_kernel());
-      irpass::re_id(offload.get());
-      auto new_data = this->modulegen(nullptr, offload->as<OffloadedStmt>());
-      data[i].tasks = std::move(new_data.tasks);
-      data[i].module = std::move(new_data.module);
-    };
-    if (kernel->is_evaluator) {
-      compile_func();
-    } else {
-      worker.enqueue(compile_func);
-    }
-  }
-  if (!kernel->is_evaluator) {
-    worker.flush();
-  }
+  CodeGenLLVMCPU gen(kernel, ir);
+  auto compiled_res = gen.run_compilation();
 
+  CPUModuleToFunctionConverter converter{gen.tlctx,
+                                         llvm_prog->get_runtime_executor()};
+  std::vector<LLVMCompiledData> data_list;
+  data_list.push_back(std::move(compiled_res));
   if (!kernel->is_evaluator) {
-    cache_module(kernel_key, data);
+    cache_module(kernel_key, data_list);
   }
 
-  CPUModuleToFunctionConverter converter(
-      tlctx, get_llvm_program(prog)->get_runtime_executor());
-  return converter.convert(kernel, std::move(data));
+  return converter.convert(this->kernel, std::move(data_list));
 }
 TLANG_NAMESPACE_END
diff --git a/taichi/program/compile_config.cpp b/taichi/program/compile_config.cpp
@@ -58,7 +58,7 @@ CompileConfig::CompileConfig() {
   print_kernel_llvm_ir = false;
   print_kernel_nvptx = false;
   print_kernel_llvm_ir_optimized = false;
-  num_compile_threads = 2;
+  num_compile_threads = 0;
 
   // CUDA backend options:
   device_memory_GB = 1;  // by default, preallocate 1 GB GPU memory

diff --git a/tests/python/test_offline_cache.py b/tests/python/test_offline_cache.py
@@ -5,8 +5,6 @@
 from os import listdir, remove, rmdir, stat
 from os.path import join
 from tempfile import mkdtemp
-from time import sleep
-from typing import List
 
 import pytest
 
@@ -38,11 +36,12 @@ def get_cache_files_size(path):
     return result
 
 
-def get_expected_num_cache_files(num_offloads: List[int] = None) -> int:
-    if not num_offloads:
+def get_expected_num_cache_files(num_kernels: int) -> int:
+    if num_kernels == 0:
         return 0
+    NUM_CACHE_FILES_PER_KERNEL = 1
     # metadata.{json, tcb}
-    return 2 + sum(num_offloads)
+    return 2 + NUM_CACHE_FILES_PER_KERNEL * num_kernels
 
 
 def tmp_offline_cache_file_path():
@@ -101,11 +100,10 @@ def python_kernel3(a, mat):
 
 
 simple_kernels_to_test = [
-    (kernel0, (), python_kernel0, 1),
-    (kernel1, (100, 200, 10.2), python_kernel1, 1),
-    (kernel2, (1024, ), python_kernel2, 3),
-    (kernel3, (10, ti.Matrix([[1, 2], [256, 1024]],
-                             ti.i32)), python_kernel3, 1),
+    (kernel0, (), python_kernel0),
+    (kernel1, (100, 200, 10.2), python_kernel1),
+    (kernel2, (1024, ), python_kernel2),
+    (kernel3, (10, ti.Matrix([[1, 2], [256, 1024]], ti.i32)), python_kernel3),
 ]
 
 
@@ -129,29 +127,28 @@ def wrapped(*args, **kwargs):
 
 
 @_test_offline_cache_dec
-def _test_offline_cache_for_a_kernel(curr_arch, kernel, args, result,
-                                     num_offloads):
+def _test_offline_cache_for_a_kernel(curr_arch, kernel, args, result):
     count_of_cache_file = len(listdir(tmp_offline_cache_file_path()))
 
     ti.init(arch=curr_arch,
             enable_fallback=False,
             **current_thread_ext_options())
     res1 = kernel(*args)
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files()
+               ) - count_of_cache_file == get_expected_num_cache_files(0)
 
     ti.init(arch=curr_arch,
             enable_fallback=False,
             **current_thread_ext_options())
-    assert len(listdir(tmp_offline_cache_file_path(
-    ))) - count_of_cache_file == get_expected_num_cache_files([num_offloads])
+    assert len(listdir(tmp_offline_cache_file_path())
+               ) - count_of_cache_file == get_expected_num_cache_files(1)
     res2 = kernel(*args)
     assert res1 == test_utils.approx(result) and res1 == test_utils.approx(
         res2)
 
     ti.reset()
-    assert len(listdir(tmp_offline_cache_file_path(
-    ))) - count_of_cache_file == get_expected_num_cache_files([num_offloads])
+    assert len(listdir(tmp_offline_cache_file_path())
+               ) - count_of_cache_file == get_expected_num_cache_files(1)
 
 
 @_test_offline_cache_dec
@@ -163,26 +160,26 @@ def _test_closing_offline_cache_for_a_kernel(curr_arch, kernel, args, result):
             offline_cache_file_path=tmp_offline_cache_file_path())
     res1 = kernel(*args)
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files()
+               ) - count_of_cache_file == get_expected_num_cache_files(0)
 
     ti.init(arch=curr_arch,
             enable_fallback=False,
             offline_cache_file_path=tmp_offline_cache_file_path())
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files()
+               ) - count_of_cache_file == get_expected_num_cache_files(0)
     res2 = kernel(*args)
 
     assert res1 == test_utils.approx(result) and res1 == test_utils.approx(
         res2)
 
     ti.reset()
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files()
+               ) - count_of_cache_file == get_expected_num_cache_files(0)
 
 
 @pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
 def test_closing_offline_cache(curr_arch):
-    for kernel, args, get_res, num_offloads in simple_kernels_to_test:
+    for kernel, args, get_res in simple_kernels_to_test:
         _test_closing_offline_cache_for_a_kernel(curr_arch=curr_arch,
                                                  kernel=kernel,
                                                  args=args,
@@ -191,13 +188,11 @@ def test_closing_offline_cache(curr_arch):
 
 @pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
 def test_offline_cache_per_kernel(curr_arch):
-    for kernel, args, get_res, num_offloads in simple_kernels_to_test:
-        _test_offline_cache_for_a_kernel(
-            curr_arch=curr_arch,
-            kernel=kernel,
-            args=args,
-            result=get_res(*args),
-            num_offloads=num_offloads if curr_arch is ti.cpu else 1)
+    for kernel, args, get_res in simple_kernels_to_test:
+        _test_offline_cache_for_a_kernel(curr_arch=curr_arch,
+                                         kernel=kernel,
+                                         args=args,
+                                         result=get_res(*args))
 
 
 @pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
@@ -229,18 +224,18 @@ def compute_y():
             **current_thread_ext_options())
     helper()
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files()
+               ) - count_of_cache_file == get_expected_num_cache_files(0)
 
     ti.init(arch=curr_arch,
             enable_fallback=False,
             **current_thread_ext_options())
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files([8])
+               ) - count_of_cache_file == get_expected_num_cache_files(8)
     helper()
 
     ti.reset()
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files([8])
+               ) - count_of_cache_file == get_expected_num_cache_files(8)
 
 
 @pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
@@ -264,7 +259,7 @@ def np_kernel(a, b):
     np_mat3 = mat3.to_numpy()
 
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files()
+               ) - count_of_cache_file == get_expected_num_cache_files(0)
     ti.init(arch=curr_arch,
             enable_fallback=False,
             **current_thread_ext_options())
@@ -274,7 +269,7 @@ def np_kernel(a, b):
             enable_fallback=False,
             **current_thread_ext_options())
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files([1])
+               ) - count_of_cache_file == get_expected_num_cache_files(1)
 
     assert (kernel(mat1, mat1).to_numpy() == np_kernel(np_mat1, np_mat1)).all()
     assert (kernel(mat1, mat2).to_numpy() == np_kernel(np_mat1, np_mat2)).all()
@@ -283,7 +278,7 @@ def np_kernel(a, b):
 
     ti.reset()
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files([1])
+               ) - count_of_cache_file == get_expected_num_cache_files(1)
 
 
 @pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
@@ -306,7 +301,7 @@ def helper():
         assert y[None] == test_utils.approx(7.28)
 
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files()
+               ) - count_of_cache_file == get_expected_num_cache_files(0)
     ti.init(arch=curr_arch,
             enable_fallback=False,
             **current_thread_ext_options())
@@ -316,12 +311,12 @@ def helper():
             enable_fallback=False,
             **current_thread_ext_options())
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files([4])
+               ) - count_of_cache_file == get_expected_num_cache_files(4)
     helper()
 
     ti.reset()
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files([4])
+               ) - count_of_cache_file == get_expected_num_cache_files(4)
 
 
 @pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
@@ -330,31 +325,27 @@ def test_calling_many_kernels(curr_arch):
     count_of_cache_file = len(listdir(tmp_offline_cache_file_path()))
 
     def helper():
-        for kernel, args, get_res, num_offloads in simple_kernels_to_test:
+        for kernel, args, get_res in simple_kernels_to_test:
             assert (kernel(*args) == test_utils.approx(get_res(*args)))
 
     ti.init(arch=curr_arch,
             enable_fallback=False,
             **current_thread_ext_options())
     helper()
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files()
+               ) - count_of_cache_file == get_expected_num_cache_files(0)
 
     ti.init(arch=curr_arch,
             enable_fallback=False,
             **current_thread_ext_options())
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files([
-                   kern[3] if curr_arch is ti.cpu else 1
-                   for kern in simple_kernels_to_test
-               ])
+               ) - count_of_cache_file == get_expected_num_cache_files(
+                   len(simple_kernels_to_test))
     helper()
     ti.reset()
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files([
-                   kern[3] if curr_arch is ti.cpu else 1
-                   for kern in simple_kernels_to_test
-               ])
+               ) - count_of_cache_file == get_expected_num_cache_files(
+                   len(simple_kernels_to_test))
 
 
 @pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
@@ -371,7 +362,7 @@ def helper():
             c += i
 
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files()
+               ) - count_of_cache_file == get_expected_num_cache_files(0)
     ti.init(arch=curr_arch,
             enable_fallback=False,
             default_fp=ti.f32,
@@ -383,14 +374,12 @@ def helper():
             default_fp=ti.f64,
             **current_thread_ext_options())
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files(
-                   [2] if curr_arch is ti.cpu else [1])
+               ) - count_of_cache_file == get_expected_num_cache_files(1)
     helper()
 
     ti.reset()
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files(
-                   [2, 2] if curr_arch is ti.cpu else [1, 1])
+               ) - count_of_cache_file == get_expected_num_cache_files(2)
     ti.init(arch=curr_arch,
             enable_fallback=False,
             default_fp=ti.f32,
@@ -400,8 +389,7 @@ def helper():
 
     ti.reset()
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files(
-                   [2, 2] if curr_arch is ti.cpu else [1, 1])
+               ) - count_of_cache_file == get_expected_num_cache_files(2)
 
 
 @pytest.mark.parametrize('curr_arch', supported_archs_offline_cache)
@@ -420,9 +408,8 @@ def only_init(max_size):
 
     def run_simple_kernels(max_size):
         only_init(max_size)
-        for kernel, args, get_res, num_offloads in simple_kernels_to_test:
+        for kernel, args, get_res in simple_kernels_to_test:
             assert kernel(*args) == test_utils.approx(get_res(*args))
-            sleep(1)  # make sure the kernels are not used in the same second
 
     kernel_count = len(simple_kernels_to_test)
     rem_factor = 1 if policy in [
@@ -431,39 +418,23 @@ def run_simple_kernels(max_size):
     count_of_cache_file = len(listdir(tmp_offline_cache_file_path()))
 
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files()
+               ) - count_of_cache_file == get_expected_num_cache_files(0)
 
     run_simple_kernels(1024**3)  # 1GB
     ti.reset()  # Dumping cache data
     size_of_cache_files = get_cache_files_size(tmp_offline_cache_file_path())
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files([
-                   kern[3] if curr_arch is ti.cpu else 1
-                   for kern in simple_kernels_to_test
-               ])
+               ) - count_of_cache_file == get_expected_num_cache_files(
+                   len(simple_kernels_to_test))
 
     only_init(size_of_cache_files * 2)
     ti.reset()
     assert len(listdir(tmp_offline_cache_file_path())
-               ) - count_of_cache_file == get_expected_num_cache_files([
-                   kern[3] if curr_arch is ti.cpu else 1
-                   for kern in simple_kernels_to_test
-               ])
+               ) - count_of_cache_file == get_expected_num_cache_files(
+                   len(simple_kernels_to_test))
 
     only_init(size_of_cache_files)
     ti.reset()
-    rem = 0
-    if policy in ['never', 'version']:
-        rem = sum([
-            kern[3] if curr_arch is ti.cpu else 1
-            for kern in simple_kernels_to_test
-        ])
-    else:
-        for i in range(
-                min(kernel_count - int(factor * kernel_count), kernel_count)):
-            rem += simple_kernels_to_test[kernel_count - i -
-                                          1][3] if curr_arch is ti.cpu else 1
-    if rem > 0:
-        rem += 2
-    assert len(listdir(
-        tmp_offline_cache_file_path())) - count_of_cache_file == rem
+    assert len(listdir(tmp_offline_cache_file_path())
+               ) - count_of_cache_file == get_expected_num_cache_files(
+                   int(len(simple_kernels_to_test) * rem_factor))