From 65b5311fa5e9f884bf84a456ad5bf87361d349da Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Thu, 20 Oct 2022 11:26:42 +0800 Subject: [PATCH] [misc] Rc v1.2.0 cherry-pick PR number 2 (#6384) Issue: # ### Brief Summary Co-authored-by: Mingrui Zhang <33411325+erizmr@users.noreply.github.com> Co-authored-by: Ailing Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/lang/articles/debug/debugging.md | 8 + python/taichi/ad/_ad.py | 6 +- python/taichi/lang/kernel_impl.py | 8 +- taichi/program/kernel.cpp | 5 +- tests/python/test_ad_gdar_diffmpm.py | 185 ++++++++++++++++++ ...test_ad_global_data_access_rule_checker.py | 32 +++ 6 files changed, 240 insertions(+), 4 deletions(-) create mode 100644 tests/python/test_ad_gdar_diffmpm.py diff --git a/docs/lang/articles/debug/debugging.md b/docs/lang/articles/debug/debugging.md index bb2c55ba7b71b..9616d8da17ce7 100644 --- a/docs/lang/articles/debug/debugging.md +++ b/docs/lang/articles/debug/debugging.md @@ -68,6 +68,14 @@ def inside_taichi_scope(): `print` in the Taichi scope is supported on the CPU, CUDA, and Vulkan backends only. +:::note +To enable printing on vulkan backend, please +- make sure validation layer is installed via [vulkan sdk](https://vulkan.lunarg.com/sdk/home). +- turn on debug mode by `ti.init(debug=True)`. + +Note printing is not supported on macOS vulkan backend. +::: + :::note `print` does not work in Graphical Python Shells, such as IDLE and Jupyter Notebook. This is because these backends print outputs to the console, not to the GUI. ::: diff --git a/python/taichi/ad/_ad.py b/python/taichi/ad/_ad.py index f384dc1fe93fb..3e99da1681d3c 100644 --- a/python/taichi/ad/_ad.py +++ b/python/taichi/ad/_ad.py @@ -216,7 +216,11 @@ def __exit__(self, _type, value, tb): calls[0].autodiff_mode = mode def insert(self, func, args): - assert func.autodiff_mode == AutodiffMode.NONE, "Inserted funcs should be forward kernels." + # Kernels with mode `AutodiffMode.NONE` and `AutodiffMode.VALIDATION` are all forward kernels. + # The difference is there are `assert` for global data access rule check in VALIDATION kernels. + assert func.autodiff_mode in ( + AutodiffMode.NONE, AutodiffMode.VALIDATION + ), "Inserted funcs should be forward kernels." self.modes.append(func.autodiff_mode) if self.validation: func.autodiff_mode = AutodiffMode.VALIDATION diff --git a/python/taichi/lang/kernel_impl.py b/python/taichi/lang/kernel_impl.py index cb479b38bf174..3a705d5d119e0 100644 --- a/python/taichi/lang/kernel_impl.py +++ b/python/taichi/lang/kernel_impl.py @@ -522,6 +522,8 @@ def materialize(self, key=None, args=None, arg_features=None): grad_suffix = "_forward_grad" elif self.autodiff_mode == AutodiffMode.REVERSE: grad_suffix = "_reverse_grad" + elif self.autodiff_mode == AutodiffMode.VALIDATION: + grad_suffix = "_validate_grad" kernel_name = f"{self.func.__name__}_c{self.kernel_counter}_{key[1]}{grad_suffix}" _logging.trace(f"Compiling kernel {kernel_name}...") @@ -850,7 +852,11 @@ def __call__(self, *args, **kwargs): # Both the class kernels and the plain-function kernels are unified now. # In both cases, |self.grad| is another Kernel instance that computes the # gradient. For class kernels, args[0] is always the kernel owner. - if self.autodiff_mode == AutodiffMode.NONE and self.runtime.target_tape and not self.runtime.grad_replaced: + + # No need to capture grad kernels because they are already bound with their primal kernels + if self.autodiff_mode in ( + AutodiffMode.NONE, AutodiffMode.VALIDATION + ) and self.runtime.target_tape and not self.runtime.grad_replaced: self.runtime.target_tape.insert(self, args) if self.autodiff_mode != AutodiffMode.NONE and impl.current_cfg( diff --git a/taichi/program/kernel.cpp b/taichi/program/kernel.cpp index fc54d84204de2..a5efe6a096117 100644 --- a/taichi/program/kernel.cpp +++ b/taichi/program/kernel.cpp @@ -420,9 +420,10 @@ void Kernel::init(Program &program, this->arch = program.this_thread_config().arch; - if (autodiff_mode == AutodiffMode::kNone || - autodiff_mode == AutodiffMode::kCheckAutodiffValid) { + if (autodiff_mode == AutodiffMode::kNone) { name = primal_name; + } else if (autodiff_mode == AutodiffMode::kCheckAutodiffValid) { + name = primal_name + "_validate_grad"; } else if (autodiff_mode == AutodiffMode::kForward) { name = primal_name + "_forward_grad"; } else if (autodiff_mode == AutodiffMode::kReverse) { diff --git a/tests/python/test_ad_gdar_diffmpm.py b/tests/python/test_ad_gdar_diffmpm.py new file mode 100644 index 0000000000000..349a16b3c3744 --- /dev/null +++ b/tests/python/test_ad_gdar_diffmpm.py @@ -0,0 +1,185 @@ +import pytest + +import taichi as ti +from tests import test_utils + + +@test_utils.test(require=ti.extension.assertion, debug=True, exclude=[ti.cc]) +def test_gdar_mpm(): + real = ti.f32 + + dim = 2 + N = 30 # reduce to 30 if run out of GPU memory + n_particles = N * N + n_grid = 120 + dx = 1 / n_grid + inv_dx = 1 / dx + dt = 3e-4 + p_mass = 1 + p_vol = 1 + E = 100 + mu = E + la = E + max_steps = 32 + steps = 32 + gravity = 9.8 + target = [0.3, 0.6] + + scalar = lambda: ti.field(dtype=real) + vec = lambda: ti.Vector.field(dim, dtype=real) + mat = lambda: ti.Matrix.field(dim, dim, dtype=real) + + x = ti.Vector.field(dim, + dtype=real, + shape=(max_steps, n_particles), + needs_grad=True) + x_avg = ti.Vector.field(dim, dtype=real, shape=(), needs_grad=True) + v = ti.Vector.field(dim, + dtype=real, + shape=(max_steps, n_particles), + needs_grad=True) + grid_v_in = ti.Vector.field(dim, + dtype=real, + shape=(max_steps, n_grid, n_grid), + needs_grad=True) + grid_v_out = ti.Vector.field(dim, + dtype=real, + shape=(max_steps, n_grid, n_grid), + needs_grad=True) + grid_m_in = ti.field(dtype=real, + shape=(max_steps, n_grid, n_grid), + needs_grad=True) + C = ti.Matrix.field(dim, + dim, + dtype=real, + shape=(max_steps, n_particles), + needs_grad=True) + F = ti.Matrix.field(dim, + dim, + dtype=real, + shape=(max_steps, n_particles), + needs_grad=True) + init_v = ti.Vector.field(dim, dtype=real, shape=(), needs_grad=True) + loss = ti.field(dtype=real, shape=(), needs_grad=True) + + @ti.kernel + def set_v(): + for i in range(n_particles): + v[0, i] = init_v[None] + + @ti.kernel + def p2g(f: ti.i32): + for p in range(n_particles): + base = ti.cast(x[f, p] * inv_dx - 0.5, ti.i32) + fx = x[f, p] * inv_dx - ti.cast(base, ti.i32) + w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2] + new_F = (ti.Matrix.diag(dim=2, val=1) + dt * C[f, p]) @ F[f, p] + F[f + 1, p] = new_F + J = (new_F).determinant() + r, s = ti.polar_decompose(new_F) + cauchy = 2 * mu * (new_F - r) @ new_F.transpose() + \ + ti.Matrix.diag(2, la * (J - 1) * J) + stress = -(dt * p_vol * 4 * inv_dx * inv_dx) * cauchy + affine = stress + p_mass * C[f, p] + for i in ti.static(range(3)): + for j in ti.static(range(3)): + offset = ti.Vector([i, j]) + dpos = (ti.cast(ti.Vector([i, j]), real) - fx) * dx + weight = w[i](0) * w[j](1) + grid_v_in[f, base + offset] += weight * (p_mass * v[f, p] + + affine @ dpos) + grid_m_in[f, base + offset] += weight * p_mass + + bound = 3 + + @ti.kernel + def grid_op(f: ti.i32): + for i, j in ti.ndrange(n_grid, n_grid): + inv_m = 1 / (grid_m_in[f, i, j] + 1e-10) + v_out = inv_m * grid_v_in[f, i, j] + v_out[1] -= dt * gravity + if i < bound and v_out[0] < 0: + v_out[0] = 0 + if i > n_grid - bound and v_out[0] > 0: + v_out[0] = 0 + if j < bound and v_out[1] < 0: + v_out[1] = 0 + if j > n_grid - bound and v_out[1] > 0: + v_out[1] = 0 + grid_v_out[f, i, j] = v_out + + @ti.kernel + def g2p(f: ti.i32): + for p in range(n_particles): + base = ti.cast(x[f, p] * inv_dx - 0.5, ti.i32) + fx = x[f, p] * inv_dx - ti.cast(base, real) + w = [ + 0.5 * (1.5 - fx)**2, 0.75 - (fx - 1.0)**2, 0.5 * (fx - 0.5)**2 + ] + new_v = ti.Vector([0.0, 0.0]) + new_C = ti.Matrix([[0.0, 0.0], [0.0, 0.0]]) + + for i in ti.static(range(3)): + for j in ti.static(range(3)): + dpos = ti.cast(ti.Vector([i, j]), real) - fx + g_v = grid_v_out[f, base(0) + i, base(1) + j] + weight = w[i](0) * w[j](1) + new_v += weight * g_v + new_C += 4 * weight * g_v.outer_product(dpos) * inv_dx + + # Here violate global data access rule, should be captured by the checker + v[f, p] = new_v + x[f + 1, p] = x[f, p] + dt * v[f, p] + C[f + 1, p] = new_C + + @ti.kernel + def compute_x_avg(): + for i in range(n_particles): + x_avg[None] += (1 / n_particles) * x[steps - 1, i] + + @ti.kernel + def compute_loss(): + dist = (x_avg[None] - ti.Vector(target))**2 + loss[None] = 0.5 * (dist(0) + dist(1)) + + def substep(s): + p2g(s) + grid_op(s) + g2p(s) + + # initialization + init_v[None] = [0, 0] + + for i in range(n_particles): + F[0, i] = [[1, 0], [0, 1]] + + for i in range(N): + for j in range(N): + x[0, i * N + j] = [dx * (i * 0.7 + 10), dx * (j * 0.7 + 25)] + + set_v() + + losses = [] + + for i in range(2): + grid_v_in.fill(0) + grid_m_in.fill(0) + + x_avg[None] = [0, 0] + + with pytest.raises(ti.TaichiAssertionError): + with ti.ad.Tape(loss=loss, validation=True): + set_v() + for s in range(steps - 1): + substep(s) + + compute_x_avg() + compute_loss() + + l = loss[None] + losses.append(l) + grad = init_v.grad[None] + print('loss=', l, ' grad=', (grad[0], grad[1])) + learning_rate = 10 + init_v[None][0] -= learning_rate * grad[0] + init_v[None][1] -= learning_rate * grad[1] diff --git a/tests/python/test_ad_global_data_access_rule_checker.py b/tests/python/test_ad_global_data_access_rule_checker.py index db85fda79165a..70c0c630cea8a 100644 --- a/tests/python/test_ad_global_data_access_rule_checker.py +++ b/tests/python/test_ad_global_data_access_rule_checker.py @@ -161,3 +161,35 @@ def kernel_2(): func_calls = t.calls for f, _ in func_calls: assert f.autodiff_mode == AutodiffMode.NONE + + +@test_utils.test(require=ti.extension.assertion, exclude=[ti.cc], debug=True) +def test_validation_kernel_capture(): + N = 16 + T = 8 + x = ti.field(dtype=ti.f32, shape=N, needs_grad=True) + loss = ti.field(dtype=ti.f32, shape=(), needs_grad=True) + b = ti.field(dtype=ti.f32, shape=(), needs_grad=True) + + @ti.kernel + def kernel_1(): + loss[None] = x[1] * b[None] + + @ti.kernel + def kernel_2(): + loss[None] = x[1] * b[None] + + def forward(T): + for t in range(T): + kernel_1() + kernel_2() + + for i in range(N): + x[i] = i + + b[None] = 10 + loss.grad[None] = 1 + + with ti.ad.Tape(loss=loss, validation=True) as t: + forward(T) + assert len(t.calls) == 2 * T and len(t.modes) == 2 * T