From ad575c80fe5b7c96969e3f78c3a3c3429b6c76c4 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 3 Oct 2022 21:36:48 +0300 Subject: [PATCH] use read/write_events everywhere --- pyopencl/algorithm.py | 14 +++++------ pyopencl/bitonic_sort.py | 2 +- pyopencl/clmath.py | 50 +++++++++++++++++----------------------- pyopencl/clrandom.py | 22 +++++------------- pyopencl/invoker.py | 2 +- pyopencl/reduction.py | 4 ++-- pyopencl/scan.py | 4 ++-- 7 files changed, 40 insertions(+), 58 deletions(-) diff --git a/pyopencl/algorithm.py b/pyopencl/algorithm.py index 0048a2455..ff01652a3 100644 --- a/pyopencl/algorithm.py +++ b/pyopencl/algorithm.py @@ -59,7 +59,7 @@ def _extract_extra_args_types_values(extra_args): if isinstance(val, cl.array.Array): extra_args_types.append(VectorArg(val.dtype, name, with_offset=False)) extra_args_values.append(val) - extra_wait_for.extend(val.events) + extra_wait_for.extend(val.write_events) elif isinstance(val, np.generic): extra_args_types.append(ScalarArg(val.dtype, name)) extra_args_values.append(val) @@ -1161,7 +1161,7 @@ def __call__(self, queue, n_objects, *args, **kwargs): data_args.append(arg_val.base_data) if arg_descr.with_offset: data_args.append(arg_val.offset) - wait_for.extend(arg_val.events) + wait_for.extend(arg_val.write_events) else: data_args.append(arg_val) @@ -1180,7 +1180,7 @@ def __call__(self, queue, n_objects, *args, **kwargs): counts = cl.array.empty(queue, (n_objects + 1), index_dtype, allocator=allocator) counts[-1] = 0 - wait_for = wait_for + counts.events + wait_for = wait_for + counts.write_events # The scan will turn the "counts" array into the "starts" array # in-place. @@ -1233,7 +1233,7 @@ def __call__(self, queue, n_objects, *args, **kwargs): info_record.nonempty_indices, info_record.compressed_indices, info_record.num_nonempty_lists, - wait_for=[count_event] + info_record.compressed_indices.events) + wait_for=[count_event] + info_record.compressed_indices.write_events) info_record.starts = compressed_counts @@ -1262,13 +1262,13 @@ def __call__(self, queue, n_objects, *args, **kwargs): evt = scan_kernel( starts_ary, size=info_record.num_nonempty_lists, - wait_for=starts_ary.events) + wait_for=starts_ary.write_events) else: evt = scan_kernel(starts_ary, wait_for=[count_event], size=n_objects) starts_ary.setitem(0, 0, queue=queue, wait_for=[evt]) - scan_events.extend(starts_ary.events) + scan_events.extend(starts_ary.write_events) # retrieve count info_record.count = int(starts_ary[-1].get()) @@ -1430,7 +1430,7 @@ def __call__(self, queue, keys, values, nkeys, starts = (cl.array.empty(queue, (nkeys+1), starts_dtype, allocator=allocator) .fill(len(values_sorted_by_key), wait_for=[evt])) - evt, = starts.events + evt, = starts.write_events evt = knl_info.start_finder(starts, keys_sorted_by_key, range=slice(len(keys_sorted_by_key)), diff --git a/pyopencl/bitonic_sort.py b/pyopencl/bitonic_sort.py index 29fff563a..ce9a13e8b 100644 --- a/pyopencl/bitonic_sort.py +++ b/pyopencl/bitonic_sort.py @@ -92,7 +92,7 @@ def __call__(self, arr, idx=None, queue=None, wait_for=None, axis=0): if wait_for is None: wait_for = [] - wait_for = wait_for + arr.events + wait_for = wait_for + arr.write_events last_evt = cl.enqueue_marker(queue, wait_for=wait_for) diff --git a/pyopencl/clmath.py b/pyopencl/clmath.py index b5c4fdfcb..ef0f555a9 100644 --- a/pyopencl/clmath.py +++ b/pyopencl/clmath.py @@ -22,14 +22,13 @@ THE SOFTWARE. """ -import pyopencl.array as cl_array import pyopencl.elementwise as elementwise -from pyopencl.array import _get_common_dtype +from pyopencl.array import elwise_kernel_runner, _get_common_dtype import numpy as np def _make_unary_array_func(name): - @cl_array.elwise_kernel_runner + @elwise_kernel_runner def knl_runner(result, arg): if arg.dtype.kind == "c": from pyopencl.elementwise import complex_dtype_to_name @@ -42,8 +41,7 @@ def knl_runner(result, arg): def f(array, queue=None): result = array._new_like_me(queue=queue) - event1 = knl_runner(result, array, queue=queue) - result.add_event(event1) + knl_runner(result, array, queue=queue) return result return f @@ -59,13 +57,13 @@ def f(array, queue=None): asinpi = _make_unary_array_func("asinpi") -@cl_array.elwise_kernel_runner +@elwise_kernel_runner def _atan2(result, arg1, arg2): return elementwise.get_float_binary_func_kernel( result.context, "atan2", arg1.dtype, arg2.dtype, result.dtype) -@cl_array.elwise_kernel_runner +@elwise_kernel_runner def _atan2pi(result, arg1, arg2): return elementwise.get_float_binary_func_kernel( result.context, "atan2pi", arg1.dtype, arg2.dtype, result.dtype) @@ -80,7 +78,7 @@ def atan2(y, x, queue=None): """ queue = queue or y.queue result = y._new_like_me(_get_common_dtype(y, x, queue)) - result.add_event(_atan2(result, y, x, queue=queue)) + _atan2(result, y, x, queue=queue) return result @@ -94,7 +92,7 @@ def atan2pi(y, x, queue=None): """ queue = queue or y.queue result = y._new_like_me(_get_common_dtype(y, x, queue)) - result.add_event(_atan2pi(result, y, x, queue=queue)) + _atan2pi(result, y, x, queue=queue) return result @@ -121,7 +119,7 @@ def atan2pi(y, x, queue=None): # TODO: fmin -@cl_array.elwise_kernel_runner +@elwise_kernel_runner def _fmod(result, arg, mod): return elementwise.get_fmod_kernel(result.context, result.dtype, arg.dtype, mod.dtype) @@ -132,13 +130,13 @@ def fmod(arg, mod, queue=None): for each element in ``arg`` and ``mod``.""" queue = (queue or arg.queue) or mod.queue result = arg._new_like_me(_get_common_dtype(arg, mod, queue)) - result.add_event(_fmod(result, arg, mod, queue=queue)) + _fmod(result, arg, mod, queue=queue) return result # TODO: fract -@cl_array.elwise_kernel_runner +@elwise_kernel_runner def _frexp(sig, expt, arg): return elementwise.get_frexp_kernel(sig.context, sig.dtype, expt.dtype, arg.dtype) @@ -150,9 +148,7 @@ def frexp(arg, queue=None): """ sig = arg._new_like_me(queue=queue) expt = arg._new_like_me(queue=queue, dtype=np.int32) - event1 = _frexp(sig, expt, arg, queue=queue) - sig.add_event(event1) - expt.add_event(event1) + _frexp(sig, expt, arg, queue=queue, noutputs=2) return sig, expt # TODO: hypot @@ -161,7 +157,7 @@ def frexp(arg, queue=None): ilogb = _make_unary_array_func("ilogb") -@cl_array.elwise_kernel_runner +@elwise_kernel_runner def _ldexp(result, sig, exp): return elementwise.get_ldexp_kernel(result.context, result.dtype, sig.dtype, exp.dtype) @@ -173,7 +169,7 @@ def ldexp(significand, exponent, queue=None): ``result = significand * 2**exponent``. """ result = significand._new_like_me(queue=queue) - result.add_event(_ldexp(result, significand, exponent)) + _ldexp(result, significand, exponent) return result @@ -191,7 +187,7 @@ def ldexp(significand, exponent, queue=None): # TODO: minmag -@cl_array.elwise_kernel_runner +@elwise_kernel_runner def _modf(intpart, fracpart, arg): return elementwise.get_modf_kernel(intpart.context, intpart.dtype, fracpart.dtype, arg.dtype) @@ -203,9 +199,7 @@ def modf(arg, queue=None): """ intpart = arg._new_like_me(queue=queue) fracpart = arg._new_like_me(queue=queue) - event1 = _modf(intpart, fracpart, arg, queue=queue) - fracpart.add_event(event1) - intpart.add_event(event1) + _modf(intpart, fracpart, arg, queue=queue, noutputs=2) return fracpart, intpart @@ -238,19 +232,19 @@ def modf(arg, queue=None): # TODO: table 6.10, integer functions # TODO: table 6.12, clamp et al -@cl_array.elwise_kernel_runner +@elwise_kernel_runner def _bessel_jn(result, n, x): return elementwise.get_bessel_kernel(result.context, "j", result.dtype, np.dtype(type(n)), x.dtype) -@cl_array.elwise_kernel_runner +@elwise_kernel_runner def _bessel_yn(result, n, x): return elementwise.get_bessel_kernel(result.context, "y", result.dtype, np.dtype(type(n)), x.dtype) -@cl_array.elwise_kernel_runner +@elwise_kernel_runner def _hankel_01(h0, h1, x): if h0.dtype != h1.dtype: raise TypeError("types of h0 and h1 must match") @@ -260,20 +254,18 @@ def _hankel_01(h0, h1, x): def bessel_jn(n, x, queue=None): result = x._new_like_me(queue=queue) - result.add_event(_bessel_jn(result, n, x, queue=queue)) + _bessel_jn(result, n, x, queue=queue) return result def bessel_yn(n, x, queue=None): result = x._new_like_me(queue=queue) - result.add_event(_bessel_yn(result, n, x, queue=queue)) + _bessel_yn(result, n, x, queue=queue) return result def hankel_01(x, queue=None): h0 = x._new_like_me(queue=queue) h1 = x._new_like_me(queue=queue) - event1 = _hankel_01(h0, h1, x, queue=queue) - h0.add_event(event1) - h1.add_event(event1) + _hankel_01(h0, h1, x, queue=queue, noutputs=2) return h0, h1 diff --git a/pyopencl/clrandom.py b/pyopencl/clrandom.py index 165be209a..6756ac1ae 100644 --- a/pyopencl/clrandom.py +++ b/pyopencl/clrandom.py @@ -338,9 +338,11 @@ def fill_uniform(self, ary, a=0, b=1, queue=None): evt = knl(queue, (self.num_work_items,), None, self.state.data, ary.data, ary.size*size_multiplier, - b-a, a, wait_for=ary.events) - ary.add_event(evt) - self.state.add_event(evt) + b-a, a, wait_for=ary.write_events + ary.read_events) + + ary.add_write_event(evt) + self.state.add_write_event(evt) + return ary def uniform(self, *args, **kwargs): @@ -369,23 +371,11 @@ def fill_normal(self, ary, mu=0, sigma=1, queue=None): evt = knl(queue, (self.num_work_items,), self.wg_size, self.state.data, ary.data, ary.size*size_multiplier, sigma, mu, -<<<<<<< Updated upstream - wait_for=ary.events) - ary.add_event(evt) - self.state.add_event(evt) -||||||| Stash base - wait_for=ary.write_events) - - ary.add_write_event(evt) - self.state.add_write_event(evt) - -======= wait_for=ary.write_events + ary.read_events) ary.add_write_event(evt) self.state.add_write_event(evt) ->>>>>>> Stashed changes return evt def normal(self, *args, **kwargs): @@ -681,7 +671,7 @@ def _fill(self, distribution, ary, scale, shift, queue=None): gsize, lsize = _splay(queue.device, ary.size) evt = knl(queue, gsize, lsize, *args) - ary.add_event(evt) + ary.add_write_event(evt) self.counter[0] += n * counter_multiplier c1_incr, self.counter[0] = divmod(self.counter[0], self.counter_max) diff --git a/pyopencl/invoker.py b/pyopencl/invoker.py index 865e8dec5..4fa8748a3 100644 --- a/pyopencl/invoker.py +++ b/pyopencl/invoker.py @@ -138,7 +138,7 @@ def add_buf_arg(arg_idx, typechar, expr_str): cl_arg_idx += 1 if in_enqueue: - wait_for_parts .append(f"{arg_var}.events") + wait_for_parts.append(f"{arg_var}.write_events") continue diff --git a/pyopencl/reduction.py b/pyopencl/reduction.py index db8c19afa..1521b1cf7 100644 --- a/pyopencl/reduction.py +++ b/pyopencl/reduction.py @@ -423,7 +423,7 @@ def __call__(self, *args: Any, **kwargs: Any) -> cl.Event: invocation_args.append(arg.base_data) if arg_tp.with_offset: invocation_args.append(arg.offset) - wait_for.extend(arg.events) + wait_for.extend(arg.write_events) else: invocation_args.append(arg) @@ -517,7 +517,7 @@ def __call__(self, *args: Any, **kwargs: Any) -> cl.Event: wait_for=wait_for) wait_for = [last_evt] - result.add_event(last_evt) + result.add_write_event(last_evt) if group_count == 1: if return_event: diff --git a/pyopencl/scan.py b/pyopencl/scan.py index 4bd8a95f0..a0215dfc8 100644 --- a/pyopencl/scan.py +++ b/pyopencl/scan.py @@ -1523,7 +1523,7 @@ def __call__(self, *args: Any, **kwargs: Any) -> cl.Event: data_args.append(arg_val.base_data) if arg_descr.with_offset: data_args.append(arg_val.offset) - wait_for.extend(arg_val.events) + wait_for.extend(arg_val.write_events) else: data_args.append(arg_val) @@ -1750,7 +1750,7 @@ def __call__(self, *args: Any, **kwargs: Any) -> cl.Event: data_args.append(arg_val.base_data) if arg_descr.with_offset: data_args.append(arg_val.offset) - wait_for.extend(arg_val.events) + wait_for.extend(arg_val.write_events) else: data_args.append(arg_val)