Skip to content

Commit

Permalink
Implement extended dispersion spotfinding (#1)
Browse files Browse the repository at this point in the history
Implement extended dispersion spotfinding

Implement a GPU-based version of the extended dispersion spotfinding
algorithm. This builds on regular dispersion by making two passes.
This allows for the detection of fainter spots by using the first pass
to detect candidate spots and exclude them from the background
calculation in the second pass.

Extended dispersion spotfinding is unavoidably slower than regular
dispersion by the fact that it requires two passes. However, the
performance gained through massively parallel processing on the GPU
should make this a viable option, when needed, even for fast feedback.

Create several CUDA kernels to perform the extended dispersion
spotfinding algorithm (`threshold.cu`, `erosion.cu`).

Refactor the dispersion kernel to share code with extended dispersion.

Move common code to `cuda_common.hpp`.

Create basic test script for extended dispersion spotfinding.

Add an `--algorithm` argument to `spotfinder.cc` along with the
necessary code to parse it, allowing for algorithm selection at runtime.

Add new files to the CMakeLists.txt file to include them in the build.

See also: #12, #13, #14
  • Loading branch information
dimitrivlachos authored Nov 5, 2024
1 parent a2578ef commit db48d74
Show file tree
Hide file tree
Showing 13 changed files with 963 additions and 176 deletions.
3 changes: 3 additions & 0 deletions include/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
#include <type_traits>
#include <vector>

#define VALID_PIXEL 1
#define MASKED_PIXEL 0

template <typename T1, typename... TS>
auto with_formatting(const std::string &code, const T1 &first, TS... args)
-> std::string {
Expand Down
38 changes: 37 additions & 1 deletion include/cuda_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,42 @@ auto make_cuda_pitched_malloc(size_t width, size_t height) {
return std::make_pair(std::shared_ptr<T[]>(obj, deleter), pitch / sizeof(T));
}

/**
* @brief Function to allocate a pitched memory buffer on the GPU.
* @param data The pointer to the allocated memory.
* @param width The width of the buffer.
* @param height The height of the buffer.
* @param pitch The pitch of the buffer.
*/
template <typename T>
struct PitchedMalloc {
public:
using value_type = T;
PitchedMalloc(std::shared_ptr<T[]> data, size_t width, size_t height, size_t pitch)
: _data(data), width(width), height(height), pitch(pitch) {}

PitchedMalloc(size_t width, size_t height) : width(width), height(height) {
auto [alloc, alloc_pitch] = make_cuda_pitched_malloc<T>(width, height);
_data = alloc;
pitch = alloc_pitch;
}

auto get() {
return _data.get();
}
auto size_bytes() -> size_t const {
return pitch * height * sizeof(T);
}
auto pitch_bytes() -> size_t const {
return pitch * sizeof(T);
}

std::shared_ptr<T[]> _data;
size_t width;
size_t height;
size_t pitch;
};

class CudaStream {
cudaStream_t _stream;

Expand Down Expand Up @@ -409,7 +445,7 @@ void save_device_data_to_txt(PixelType *device_ptr,
for (int y = 0, k = 0; y < height; ++y) {
for (int x = 0; x < width; ++x, ++k) {
if (condition_func(host_data[k])) {
out.print("{}, {}\n", x, y);
out.print("{}, {}, {}\n", x, y, host_data[k]);
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions spotfinder/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ add_executable(spotfinder
shmread.cc
cbfread.cc
kernels/masking.cu
kernels/thresholding.cu
kernels/erosion.cu
)
target_link_libraries(spotfinder
PRIVATE
Expand Down
114 changes: 114 additions & 0 deletions spotfinder/kernels/erosion.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/**
* @file erosion.cu
* @brief Contains the CUDA kernel implementation for performing
* morphological erosion on an image using a given kernel radius
* and Chebyshev distance threshold.
*
* This kernel processes a dispersion mask containing potential signal
* spots and background pixels. The kernel processes each pixel in the
* mask and iterates over its local neighbourhood defined by a given
* kernel radius. The kernel then checks if each pixel is within a given
* Chebyshev distance threshold of a background pixel. If the pixel is
* within the threshold, it is considered part of the background and is
* marked as a background pixel in the erosion mask. Therefore eroding
* the edges of the signal spots.
*
* @note The __restrict__ keyword is used to indicate to the compiler
* that the two pointers are not aliased, allowing the compiler to
* perform more aggressive optimizations.
*/

#include <cooperative_groups.h>
#include <cooperative_groups/reduce.h>

#include "cuda_common.hpp"
#include "erosion.cuh"

namespace cg = cooperative_groups;

#pragma region Erosion kernel
__global__ void erosion(uint8_t __restrict__ *dispersion_mask,
uint8_t __restrict__ *erosion_mask,
uint8_t __restrict__ *mask,
size_t dispersion_mask_pitch,
size_t erosion_mask_pitch,
size_t mask_pitch,
int width,
int height,
uint8_t radius) {
// Calculate the pixel coordinates
auto block = cg::this_thread_block();
int x = block.group_index().x * block.group_dim().x + block.thread_index().x;
int y = block.group_index().y * block.group_dim().y + block.thread_index().y;

// Guards
if (x >= width || y >= height) return; // Out of bounds guard

bool is_background = dispersion_mask[y * dispersion_mask_pitch + x] == MASKED_PIXEL;
if (is_background) {
/*
* If the pixel is masked, we want to set it to VALID_PIXEL
* in order to invert the mask.
*/
erosion_mask[y * erosion_mask_pitch + x] = VALID_PIXEL;
return;
}

// Calculate the bounds of the erosion kernel
int x_start = max(0, x - radius);
int x_end = min(x + radius + 1, width);
int y_start = max(0, y - radius);
int y_end = min(y + radius + 1, height);

bool should_erase = false; // Flag to determine if the pixel should be erased
constexpr uint8_t chebyshev_distance_threshold = 2;

// Iterate over the kernel bounds
for (int kernel_x = x_start; kernel_x < x_end; ++kernel_x) {
for (int kernel_y = y_start; kernel_y < y_end; ++kernel_y) {
/*
* TODO: Investigate whether we should be doing this or not!
* Intuition says that we should be considering the mask,
* however DIALS does not do this. May be a bug, may be on
* purpose? Investigate!
*/
// if (mask[kernel_y * mask_pitch + kernel_x] == 0) {
// continue;
// }
if (dispersion_mask[kernel_y * dispersion_mask_pitch + kernel_x]
== MASKED_PIXEL) {
// If the current pixel is background, check the Chebyshev distance
uint8_t chebyshev_distance = max(abs(kernel_x - x), abs(kernel_y - y));

if (chebyshev_distance <= chebyshev_distance_threshold) {
// If a background pixel is too close, the current pixel should be erased
should_erase = true;
// We can then break out of the loop, as no further checks are necessary
goto termination;
}
}
}
}

termination:
if (should_erase) {
/*
* Erase the pixel from the background mask. This is done by setting the pixel
* as valid (i.e. not masked) in the erosion_mask data. This allows the pixel to be
* considered as a background pixel in the background calculation as it is not
* considered part of the signal.
*/
erosion_mask[y * erosion_mask_pitch + x] = VALID_PIXEL;
} else {
/*
* If the pixel should not be erased, this means that it is part of the signal.
* and needs to be marked as masked in the erosion_mask data. This prevents the pixel
* from being considered as part of the background in the background calculation.
*/

// Invert 'valid' signal spot to 'masked' background spots
erosion_mask[y * erosion_mask_pitch + x] =
!dispersion_mask[y * dispersion_mask_pitch + x];
}
}
#pragma enregion Erosion kernel
11 changes: 11 additions & 0 deletions spotfinder/kernels/erosion.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#pragma once

__global__ void erosion(uint8_t __restrict__ *dispersion_mask,
uint8_t __restrict__ *erosion_mask,
uint8_t __restrict__ *mask,
size_t dispersion_mask_pitch,
size_t erosion_mask_pitch,
size_t mask_pitch,
int width,
int height,
uint8_t radius);
15 changes: 9 additions & 6 deletions spotfinder/kernels/masking.cu
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,21 @@
* @param pixel_size_y The pixel size of the detector in the y-direction in m
* @return The calculated distance from the beam center in m
*/
__device__ float get_distance_from_centre(float x,
__device__ float get_distance_from_center(float x,
float y,
float centre_x,
float centre_y,
float center_x,
float center_y,
float pixel_size_x,
float pixel_size_y) {
/*
* Since this calculation is for a broad, general exclusion, we can
* use basic Pythagoras to calculate the distance from the center.
*
* We add 0.5 in order to move to the center of the pixel. Since
* starting at 0, 0, for instance, would infact be the corner.
*/
float dx = (x - centre_x) * pixel_size_x;
float dy = (y - centre_y) * pixel_size_y;
float dx = ((x + 0.5f) - center_x) * pixel_size_x;
float dy = ((y + 0.5f) - center_y) * pixel_size_y;
return sqrtf(dx * dx + dy * dy);
}

Expand Down Expand Up @@ -117,7 +120,7 @@ __global__ void apply_resolution_mask(uint8_t *mask,
return;
}

float distance_from_centre = get_distance_from_centre(
float distance_from_centre = get_distance_from_center(
x, y, beam_center_x, beam_center_y, pixel_size_x, pixel_size_y);
float resolution =
get_resolution(wavelength, distance_to_detector, distance_from_centre);
Expand Down
Loading

0 comments on commit db48d74

Please sign in to comment.