Skip to content

Commit

Permalink
Force loop unrolling to optimise register usage
Browse files Browse the repository at this point in the history
  • Loading branch information
dimitrivlachos committed Dec 13, 2024
1 parent 0b028f0 commit aed24a1
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions spotfinder/kernels/thresholding.cu
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ __device__ cuda::std::tuple<bool, bool, uint8_t> calculate_dispersion_flags(
size_t sumsq = 0;
uint8_t n = 0;

#pragma unroll
for (int i = -kernel_radius; i <= kernel_radius; ++i) {
#pragma unroll
for (int j = -kernel_radius; j <= kernel_radius; ++j) {
// Calculate the local coordinates
int lx = local_x + j;
Expand Down Expand Up @@ -421,7 +423,9 @@ __global__ void dispersion_extended_second_pass(
uint sum = 0;
uint8_t n = 0;

#pragma unroll
for (int i = -KERNEL_RADIUS_EXTENDED; i <= KERNEL_RADIUS_EXTENDED; ++i) {
#pragma unroll
for (int j = -KERNEL_RADIUS_EXTENDED; j <= KERNEL_RADIUS_EXTENDED; ++j) {
// Calculate the local coordinates
int lx = local_x + j;
Expand Down

0 comments on commit aed24a1

Please sign in to comment.