From 3cf6bde9e4ceacdb38483042a01e421e336e1ef4 Mon Sep 17 00:00:00 2001 From: nychiang Date: Mon, 13 Mar 2023 16:00:58 -0700 Subject: [PATCH] cuda reduce kernel --- src/LinAlg/VectorCudaKernels.cu | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/LinAlg/VectorCudaKernels.cu b/src/LinAlg/VectorCudaKernels.cu index 872a36667..4d6caea51 100644 --- a/src/LinAlg/VectorCudaKernels.cu +++ b/src/LinAlg/VectorCudaKernels.cu @@ -393,9 +393,9 @@ __global__ void add_linear_damping_term_cu(int n, double* data, const double* ix } /** @brief y[i] = 1.0 if x[i] is positive and id[i] = 1.0, otherwise y[i] = 0 */ -__global__ void is_posive_w_pattern_cu(int n, double* data, const double* vd, const double* id) +__global__ void is_posive_w_pattern_cu(int n, int* data, const double* vd, const double* id) { - extern __shared__ float shared_sum[]; + extern __shared__ int shared_sum[]; const int num_threads = blockDim.x * gridDim.x; const int tid = blockIdx.x * blockDim.x + threadIdx.x; int sum = 0; @@ -957,7 +957,7 @@ int is_posive_w_pattern_kernel(int n_local, cudaMemcpy(h_retval, d_retval, num_blocks*sizeof(int), cudaMemcpyDeviceToHost); int sum_result = 0; - for(int i=0;i v_temp(n); // double* dv_ptr = thrust::raw_pointer_cast(v_temp.data()); +// is_posive_w_pattern_kernel(n, dv_ptr, d1, id); // return thrust::reduce(thrust::device, v_temp.begin(), v_temp.end(), (int)0, thrust::plus()); - int irev = hiop::cuda::is_posive_w_pattern_kernel(n, dv_ptr, d1, id); + int irev = hiop::cuda::is_posive_w_pattern_kernel(n, d1, id); return irev; }