diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 63b837be9d..21b085c149 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -75,11 +75,11 @@ class CudaArray(CudaNewArray): arg : list, tuple, PythonList """ - __slots__ = ('_arg','_dtype','_precision','_shape','_rank','_order','_memory_location') + __slots__ = ('_arg','_dtype','_precision','_shape','_rank','_order','_memory_location', '_current_context') _attribute_nodes = ('_arg',) name = 'array' - def __init__(self, arg, dtype=None, order='C', memory_location='managed'): + def __init__(self, arg, dtype=None, order='C', current_context='host', memory_location='managed'): if not isinstance(arg, (PythonTuple, PythonList, Variable)): raise TypeError('Unknown type of %s.' % type(arg)) @@ -119,6 +119,10 @@ def __init__(self, arg, dtype=None, order='C', memory_location='managed'): #Verify memory location if memory_location not in ('host', 'device', 'managed'): raise ValueError("memory_location must be 'host', 'device' or 'managed'") + # ... + if current_context not in ('host', 'device'): + raise ValueError("The current context can only be 'host' or 'device'") + self._arg = arg self._shape = shape self._rank = rank @@ -126,6 +130,7 @@ def __init__(self, arg, dtype=None, order='C', memory_location='managed'): self._order = order self._precision = prec self._memory_location = memory_location + self._current_context = current_context super().__init__() def __str__(self): @@ -136,8 +141,16 @@ def arg(self): return self._arg @property def memory_location(self): + """ Indicate if the array is in the host or device memory + """ return self._memory_location + @property + def current_context(self): + """ Indicates if the array created is in a host or device context + """ + return self._current_context + class CudaSynchronize(PyccelInternalFunction): "Represents a call to Cuda.deviceSynchronize for code generation." @@ -281,8 +294,6 @@ def __new__(cls, dim=0): return expr[0] return PythonTuple(*expr) - - cuda_funcs = { 'array' : PyccelFunctionDef('array' , CudaArray), 'copy' : PyccelFunctionDef('copy' , CudaCopy), diff --git a/pyccel/ast/cupyext.py b/pyccel/ast/cupyext.py index 68a5a07391..1ca840cdfb 100644 --- a/pyccel/ast/cupyext.py +++ b/pyccel/ast/cupyext.py @@ -83,11 +83,11 @@ class CupyArray(CupyNewArray): arg : list, tuple, PythonList """ - __slots__ = ('_arg','_dtype','_precision','_shape','_rank','_order') + __slots__ = ('_arg', '_dtype','_precision','_shape','_rank','_order', '_memory_location', '_current_context') _attribute_nodes = ('_arg',) name = 'array' - def __init__(self, arg, dtype=None, order='C'): + def __init__(self, arg, dtype=None, order='C', current_context='host'): if not isinstance(arg, (PythonTuple, PythonList, Variable)): raise TypeError('Unknown type of %s.' % type(arg)) @@ -110,7 +110,8 @@ def __init__(self, arg, dtype=None, order='C'): shape = process_shape(False, arg.shape) rank = len(shape) - + if current_context not in ('host', 'device'): + raise ValueError("The current context can only be 'host' or 'device'") if rank < 2: order = None else: @@ -131,6 +132,8 @@ def __init__(self, arg, dtype=None, order='C'): self._dtype = dtype self._order = order self._precision = prec + self._current_context = current_context + self._memory_location = 'device' super().__init__() def __str__(self): @@ -140,6 +143,18 @@ def __str__(self): def arg(self): return self._arg + @property + def current_context(self): + """ Indicates if the array created is in a host or device context + """ + return self._current_context + + @property + def memory_location(self): + """ Indicate if the array is in the host or device memory + """ + return self._memory_location + #============================================================================== class CupyArange(CupyNewArray): """ diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py index f3553b0ac0..3511c7777e 100644 --- a/pyccel/ast/variable.py +++ b/pyccel/ast/variable.py @@ -109,8 +109,8 @@ class base if variable is an object or an object member [Default value: None] >>> Variable('int', DottedName('matrix', 'n_rows')) matrix.n_rows """ - __slots__ = ('_name', '_alloc_shape', '_memory_handling', '_memory_location', '_is_const', - '_is_target', '_is_optional', '_allows_negative_indexes', + __slots__ = ('_name', '_alloc_shape', '_memory_handling', '_memory_location', '_current_context', + '_is_const', '_is_target', '_is_optional', '_allows_negative_indexes', '_cls_base', '_is_argument', '_is_kwonly', '_is_temp','_dtype','_precision', '_rank','_shape','_order','_is_private') _attribute_nodes = () @@ -123,6 +123,7 @@ def __init__( rank=0, memory_handling='stack', memory_location='host', + current_context = 'host', is_const=False, is_target=False, is_optional=False, @@ -162,6 +163,10 @@ def __init__( raise ValueError("memory_location must be 'host', 'device' or 'managed'") self._memory_location = memory_location + if current_context not in ('host', 'device'): + raise ValueError("The current context can only be 'host' or 'device'") + self._current_context = current_context + if not isinstance(is_const, bool): raise TypeError('is_const must be a boolean.') self._is_const = is_const @@ -323,6 +328,12 @@ def memory_location(self): """ return self._memory_location + @property + def current_context(self): + """ Indicates if the variable is currently in the host or device context + """ + return self._current_context + @memory_location.setter def memory_location(self, memory_location): if memory_location not in ('host', 'device', 'managed'): diff --git a/pyccel/codegen/printing/ccudacode.py b/pyccel/codegen/printing/ccudacode.py index 2d6ac5ad43..dd41fe8133 100644 --- a/pyccel/codegen/printing/ccudacode.py +++ b/pyccel/codegen/printing/ccudacode.py @@ -521,6 +521,11 @@ def copy_CudaArray_Data(self, expr): declare_dtype = self.find_in_dtype_registry(self._print(rhs.dtype), rhs.precision) dtype = self.find_in_ndarray_type_registry(self._print(rhs.dtype), rhs.precision) arg = rhs.arg if isinstance(rhs, (CudaArray, CupyArray)) else rhs + + memcpy_kind_src = str(rhs.current_context).capitalize() + memcpy_kind_dest = 'Host' if rhs.memory_location == 'host' else 'Device' + memcpy_kind = f"cudaMemcpy{memcpy_kind_src}To{memcpy_kind_dest}" + if rhs.rank > 1: # flattening the args to use them in C initialization. arg = self._flatten_list(arg) @@ -528,12 +533,13 @@ def copy_CudaArray_Data(self, expr): self.add_import(c_imports['string']) if isinstance(arg, Variable): arg = self._print(arg) - cpy_data = "cudaMemcpy({0}.raw_data, {1}.{2}, {0}.buffer_size, cudaMemcpyHostToDevice);".format(lhs, arg, dtype) + cpy_data = f"cudaMemcpy({lhs}.raw_data, {arg}.{dtype}, {lhs}.buffer_size, {memcpy_kind});" return '%s\n' % (cpy_data) else : arg = ', '.join(self._print(i) for i in arg) dummy_array = "%s %s[] = {%s};\n" % (declare_dtype, dummy_array_name, arg) - cpy_data = "cudaMemcpy({0}.raw_data, {1}, {0}.buffer_size, cudaMemcpyHostToDevice);".format(self._print(lhs), dummy_array_name, dtype) + target_array_name = self._print(lhs) + cpy_data = f"cudaMemcpy({target_array_name}.raw_data, {dummy_array_name}, {target_array_name}.buffer_size, {memcpy_kind});" return '%s%s\n' % (dummy_array, cpy_data) def _print_CudaSynchronize(self, expr): diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index d69a880955..cdbd2e7f5e 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -61,6 +61,7 @@ from pyccel.ast.core import Assert from pyccel.ast.class_defs import NumpyArrayClass, TupleClass, get_cls_base, CudaArrayClass +from pyccel.ast.cupyext import CupyArray from pyccel.ast.datatypes import NativeRange, str_dtype from pyccel.ast.datatypes import NativeSymbol @@ -96,7 +97,8 @@ from pyccel.ast.numpyext import DtypePrecisionToCastFunction from pyccel.ast.cupyext import CupyNewArray -from pyccel.ast.cudaext import CudaNewArray, CudaThreadIdx, CudaBlockDim, CudaBlockIdx, CudaGridDim +from pyccel.ast.cudaext import CudaArray, CudaNewArray, CudaThreadIdx, CudaBlockDim, CudaBlockIdx, CudaGridDim + from pyccel.ast.omp import (OMP_For_Loop, OMP_Simd_Construct, OMP_Distribute_Construct, OMP_TaskLoop_Construct, OMP_Sections_Construct, Omp_End_Clause, @@ -436,6 +438,7 @@ def _infere_type(self, expr, **settings): d_var['datatype' ] = expr.dtype d_var['memory_handling'] = expr.memory_handling d_var['memory_location'] = expr.memory_location + d_var['current_context'] = expr.current_context d_var['shape' ] = expr.shape d_var['rank' ] = expr.rank d_var['cls_base' ] = expr.cls_base @@ -493,6 +496,18 @@ def _infere_type(self, expr, **settings): d_var['cls_base' ] = NumpyArrayClass return d_var + elif isinstance(expr, CupyArray): + d_var['datatype' ] = expr.dtype + d_var['memory_handling'] = 'heap' if expr.rank > 0 else 'stack' + d_var['memory_location'] = expr.memory_location + d_var['current_context'] = expr.current_context + d_var['shape' ] = expr.shape + d_var['rank' ] = expr.rank + d_var['order' ] = expr.order + d_var['precision' ] = expr.precision + d_var['cls_base' ] = CudaArrayClass + return d_var + elif isinstance(expr, CupyNewArray): d_var['datatype' ] = expr.dtype d_var['memory_handling'] = 'heap' if expr.rank > 0 else 'stack' @@ -504,6 +519,18 @@ def _infere_type(self, expr, **settings): d_var['cls_base' ] = CudaArrayClass return d_var + elif isinstance(expr, CudaArray): + d_var['datatype' ] = expr.dtype + d_var['memory_handling'] = 'heap' if expr.rank > 0 else 'stack' + d_var['memory_location'] = expr.memory_location + d_var['current_context'] = expr.current_context + d_var['shape' ] = expr.shape + d_var['rank' ] = expr.rank + d_var['order' ] = expr.order + d_var['precision' ] = expr.precision + d_var['cls_base' ] = CudaArrayClass + return d_var + elif isinstance(expr, CudaNewArray): d_var['datatype' ] = expr.dtype d_var['memory_handling'] = 'heap' if expr.rank > 0 else 'stack' @@ -731,7 +758,8 @@ def _handle_function_args(self, arguments, **settings): if isinstance(a.value, StarredArguments): args.extend([FunctionCallArgument(av) for av in a.value.args_var]) else: - if isinstance(a.value, PyccelArithmeticOperator) and a.value.rank: + if (isinstance(a.value, PyccelArithmeticOperator) and a.value.rank)\ + or isinstance(a.value, (NumpyNewArray, CudaNewArray)): tmp_var = PyccelSymbol(self.scope.get_new_name(), is_temp=True) assign = self._visit(Assign(tmp_var, arg.value, fst= arg.value.fst)) self._additional_exprs[-1].append(assign) @@ -837,6 +865,18 @@ def _handle_function(self, expr, func, args, **settings): severity = 'fatal') args, kwargs = split_positional_keyword_arguments(*args) + if func in (CupyArray, CudaArray, NumpyArray): + if 'device' in self.scope.decorators or 'kernel' in self.scope.decorators: + current_context = 'device' + if kwargs.get('memory_location', 'host') == 'host': + errors.report("Host arrays cannot be allocated on the Device", + symbol = expr, + severity = 'fatal') + else: + current_context = 'host' + if func in (CudaArray, CupyArray): + kwargs['current_context'] = current_context + for a in args: if getattr(a,'dtype',None) == 'tuple': self._infere_type(a, **settings) diff --git a/tests/internal/scripts/ccuda/cuda_array_variable.py b/tests/internal/scripts/ccuda/cuda_array_variable.py new file mode 100644 index 0000000000..db34e809f4 --- /dev/null +++ b/tests/internal/scripts/ccuda/cuda_array_variable.py @@ -0,0 +1,19 @@ +# pylint: disable=missing-function-docstring, disable=unused-variable, missing-module-docstring/ + +from pyccel.decorators import kernel, types +from pyccel import cuda + +@kernel +@types('int[:]') +def square(a): + index = cuda.blockIdx(0) * cuda.blockDim(0) + cuda.threadIdx(0) + a[index] = a[index] * a[index] + +if __name__ == '__main__': + threads_per_block = 5 + n_blocks = 1 + arr_var = (0,1,2,3,4) + arr = cuda.array(arr_var) + cuda.synchronize() + square[n_blocks, threads_per_block](arr) + cuda.synchronize() diff --git a/tests/internal/scripts/ccuda/cuda_copy.py b/tests/internal/scripts/ccuda/cuda_copy.py index 11a59c7fa0..144287d53e 100644 --- a/tests/internal/scripts/ccuda/cuda_copy.py +++ b/tests/internal/scripts/ccuda/cuda_copy.py @@ -1,3 +1,5 @@ +# pylint: disable=missing-function-docstring, disable=unused-variable, missing-module-docstring/ + from pyccel.decorators import kernel, types from pyccel import cuda diff --git a/tests/internal/scripts/ccuda/cuda_grid.py b/tests/internal/scripts/ccuda/cuda_grid.py index 56c2d14325..2e8a3a4724 100644 --- a/tests/internal/scripts/ccuda/cuda_grid.py +++ b/tests/internal/scripts/ccuda/cuda_grid.py @@ -1,3 +1,5 @@ +# pylint: disable=missing-function-docstring, disable=unused-variable, missing-module-docstring + from pyccel.decorators import kernel, types from pyccel import cuda diff --git a/tests/internal/scripts/ccuda/kernel.py b/tests/internal/scripts/ccuda/kernel.py index 86d77418c6..914a776608 100644 --- a/tests/internal/scripts/ccuda/kernel.py +++ b/tests/internal/scripts/ccuda/kernel.py @@ -1,3 +1,5 @@ +# pylint: disable=missing-function-docstring, disable=unused-variable, missing-module-docstring + from pyccel.decorators import kernel, types from pyccel import cuda diff --git a/tests/internal/scripts/ccuda/kernel_launch.py b/tests/internal/scripts/ccuda/kernel_launch.py new file mode 100644 index 0000000000..d1ddd03427 --- /dev/null +++ b/tests/internal/scripts/ccuda/kernel_launch.py @@ -0,0 +1,18 @@ +# pylint: disable=missing-function-docstring, disable=unused-variable, missing-module-docstring/ + +from pyccel.decorators import kernel, types +from pyccel import cuda + +@kernel +@types('int[:]') +def func(a): + i = cuda.threadIdx(0) + cuda.blockIdx(0) * cuda.blockDim(0) + print("Hello World! ", a[i]) + +if __name__ == '__main__': + threads_per_block = 5 + n_blocks = 1 + arr = cuda.array([0, 1, 2, 3, 4]) + cuda.synchronize() + func[n_blocks, threads_per_block](arr) + cuda.synchronize()