Implement extended dispersion spotfinding (#1)

Implement extended dispersion spotfinding Implement a GPU-based version of the extended dispersion spotfinding algorithm. This builds on regular dispersion by making two passes. This allows for the detection of fainter spots by using the first pass to detect candidate spots and exclude them from the background calculation in the second pass. Extended dispersion spotfinding is unavoidably slower than regular dispersion by the fact that it requires two passes. However, the performance gained through massively parallel processing on the GPU should make this a viable option, when needed, even for fast feedback. Create several CUDA kernels to perform the extended dispersion spotfinding algorithm (`threshold.cu`, `erosion.cu`). Refactor the dispersion kernel to share code with extended dispersion. Move common code to `cuda_common.hpp`. Create basic test script for extended dispersion spotfinding. Add an `--algorithm` argument to `spotfinder.cc` along with the necessary code to parse it, allowing for algorithm selection at runtime. Add new files to the CMakeLists.txt file to include them in the build. See also: #12, #13, #14
DiamondLightSource · Nov 5, 2024 · db48d74 · db48d74
1 parent a2578ef
commit db48d74
Show file tree

Hide file tree

Showing 13 changed files with 963 additions and 176 deletions.
diff --git a/include/common.hpp b/include/common.hpp
@@ -12,6 +12,9 @@
 #include <type_traits>
 #include <vector>
 
+#define VALID_PIXEL 1
+#define MASKED_PIXEL 0
+
 template <typename T1, typename... TS>
 auto with_formatting(const std::string &code, const T1 &first, TS... args)
   -> std::string {

diff --git a/include/cuda_common.hpp b/include/cuda_common.hpp
@@ -266,6 +266,42 @@ auto make_cuda_pitched_malloc(size_t width, size_t height) {
     return std::make_pair(std::shared_ptr<T[]>(obj, deleter), pitch / sizeof(T));
 }
 
+/**
+ * @brief Function to allocate a pitched memory buffer on the GPU.
+ * @param data The pointer to the allocated memory.
+ * @param width The width of the buffer.
+ * @param height The height of the buffer.
+ * @param pitch The pitch of the buffer.
+ */
+template <typename T>
+struct PitchedMalloc {
+  public:
+    using value_type = T;
+    PitchedMalloc(std::shared_ptr<T[]> data, size_t width, size_t height, size_t pitch)
+        : _data(data), width(width), height(height), pitch(pitch) {}
+
+    PitchedMalloc(size_t width, size_t height) : width(width), height(height) {
+        auto [alloc, alloc_pitch] = make_cuda_pitched_malloc<T>(width, height);
+        _data = alloc;
+        pitch = alloc_pitch;
+    }
+
+    auto get() {
+        return _data.get();
+    }
+    auto size_bytes() -> size_t const {
+        return pitch * height * sizeof(T);
+    }
+    auto pitch_bytes() -> size_t const {
+        return pitch * sizeof(T);
+    }
+
+    std::shared_ptr<T[]> _data;
+    size_t width;
+    size_t height;
+    size_t pitch;
+};
+
 class CudaStream {
     cudaStream_t _stream;
 
@@ -409,7 +445,7 @@ void save_device_data_to_txt(PixelType *device_ptr,
     for (int y = 0, k = 0; y < height; ++y) {
         for (int x = 0; x < width; ++x, ++k) {
             if (condition_func(host_data[k])) {
-                out.print("{}, {}\n", x, y);
+                out.print("{}, {}, {}\n", x, y, host_data[k]);
             }
         }
     }

diff --git a/spotfinder/CMakeLists.txt b/spotfinder/CMakeLists.txt
@@ -12,6 +12,8 @@ add_executable(spotfinder
     shmread.cc
     cbfread.cc
     kernels/masking.cu
+    kernels/thresholding.cu
+    kernels/erosion.cu
 )
 target_link_libraries(spotfinder
     PRIVATE

diff --git a/spotfinder/kernels/erosion.cu b/spotfinder/kernels/erosion.cu
@@ -0,0 +1,114 @@
+/**
+ * @file erosion.cu
+ * @brief Contains the CUDA kernel implementation for performing
+ *        morphological erosion on an image using a given kernel radius
+ *        and Chebyshev distance threshold.
+ *
+ * This kernel processes a dispersion mask containing potential signal
+ * spots and background pixels. The kernel processes each pixel in the
+ * mask and iterates over its local neighbourhood defined by a given
+ * kernel radius. The kernel then checks if each pixel is within a given
+ * Chebyshev distance threshold of a background pixel. If the pixel is
+ * within the threshold, it is considered part of the background and is
+ * marked as a background pixel in the erosion mask. Therefore eroding
+ * the edges of the signal spots.
+ * 
+ * @note The __restrict__ keyword is used to indicate to the compiler
+ *       that the two pointers are not aliased, allowing the compiler to
+ *       perform more aggressive optimizations.
+ */
+
+#include <cooperative_groups.h>
+#include <cooperative_groups/reduce.h>
+
+#include "cuda_common.hpp"
+#include "erosion.cuh"
+
+namespace cg = cooperative_groups;
+
+#pragma region Erosion kernel
+__global__ void erosion(uint8_t __restrict__ *dispersion_mask,
+                        uint8_t __restrict__ *erosion_mask,
+                        uint8_t __restrict__ *mask,
+                        size_t dispersion_mask_pitch,
+                        size_t erosion_mask_pitch,
+                        size_t mask_pitch,
+                        int width,
+                        int height,
+                        uint8_t radius) {
+    // Calculate the pixel coordinates
+    auto block = cg::this_thread_block();
+    int x = block.group_index().x * block.group_dim().x + block.thread_index().x;
+    int y = block.group_index().y * block.group_dim().y + block.thread_index().y;
+
+    // Guards
+    if (x >= width || y >= height) return;  // Out of bounds guard
+
+    bool is_background = dispersion_mask[y * dispersion_mask_pitch + x] == MASKED_PIXEL;
+    if (is_background) {
+        /*
+         * If the pixel is masked, we want to set it to VALID_PIXEL
+         * in order to invert the mask.
+        */
+        erosion_mask[y * erosion_mask_pitch + x] = VALID_PIXEL;
+        return;
+    }
+
+    // Calculate the bounds of the erosion kernel
+    int x_start = max(0, x - radius);
+    int x_end = min(x + radius + 1, width);
+    int y_start = max(0, y - radius);
+    int y_end = min(y + radius + 1, height);
+
+    bool should_erase = false;  // Flag to determine if the pixel should be erased
+    constexpr uint8_t chebyshev_distance_threshold = 2;
+
+    // Iterate over the kernel bounds
+    for (int kernel_x = x_start; kernel_x < x_end; ++kernel_x) {
+        for (int kernel_y = y_start; kernel_y < y_end; ++kernel_y) {
+            /*
+             * TODO: Investigate whether we should be doing this or not!
+             * Intuition says that we should be considering the mask,
+             * however DIALS does not do this. May be a bug, may be on
+             * purpose? Investigate!
+            */
+            // if (mask[kernel_y * mask_pitch + kernel_x] == 0) {
+            //     continue;
+            // }
+            if (dispersion_mask[kernel_y * dispersion_mask_pitch + kernel_x]
+                == MASKED_PIXEL) {
+                // If the current pixel is background, check the Chebyshev distance
+                uint8_t chebyshev_distance = max(abs(kernel_x - x), abs(kernel_y - y));
+
+                if (chebyshev_distance <= chebyshev_distance_threshold) {
+                    // If a background pixel is too close, the current pixel should be erased
+                    should_erase = true;
+                    // We can then break out of the loop, as no further checks are necessary
+                    goto termination;
+                }
+            }
+        }
+    }
+
+termination:
+    if (should_erase) {
+        /*
+         * Erase the pixel from the background mask. This is done by setting the pixel
+         * as valid (i.e. not masked) in the erosion_mask data. This allows the pixel to be
+         * considered as a background pixel in the background calculation as it is not
+         * considered part of the signal.
+        */
+        erosion_mask[y * erosion_mask_pitch + x] = VALID_PIXEL;
+    } else {
+        /*
+         * If the pixel should not be erased, this means that it is part of the signal.
+         * and needs to be marked as masked in the erosion_mask data. This prevents the pixel
+         * from being considered as part of the background in the background calculation.
+        */
+
+        // Invert 'valid' signal spot to 'masked' background spots
+        erosion_mask[y * erosion_mask_pitch + x] =
+          !dispersion_mask[y * dispersion_mask_pitch + x];
+    }
+}
+#pragma enregion Erosion kernel
diff --git a/spotfinder/kernels/erosion.cuh b/spotfinder/kernels/erosion.cuh
@@ -0,0 +1,11 @@
+#pragma once
+
+__global__ void erosion(uint8_t __restrict__ *dispersion_mask,
+                               uint8_t __restrict__ *erosion_mask,
+                               uint8_t __restrict__ *mask,
+                               size_t dispersion_mask_pitch,
+                               size_t erosion_mask_pitch,
+                               size_t mask_pitch,
+                               int width,
+                               int height,
+                               uint8_t radius);
diff --git a/spotfinder/kernels/masking.cu b/spotfinder/kernels/masking.cu
@@ -33,18 +33,21 @@
  * @param pixel_size_y The pixel size of the detector in the y-direction in m
  * @return The calculated distance from the beam center in m
 */
-__device__ float get_distance_from_centre(float x,
+__device__ float get_distance_from_center(float x,
                                           float y,
-                                          float centre_x,
-                                          float centre_y,
+                                          float center_x,
+                                          float center_y,
                                           float pixel_size_x,
                                           float pixel_size_y) {
     /*
      * Since this calculation is for a broad, general exclusion, we can
      * use basic Pythagoras to calculate the distance from the center.
+     * 
+     * We add 0.5 in order to move to the center of the pixel. Since
+     * starting at 0, 0, for instance, would infact be the corner.
     */
-    float dx = (x - centre_x) * pixel_size_x;
-    float dy = (y - centre_y) * pixel_size_y;
+    float dx = ((x + 0.5f) - center_x) * pixel_size_x;
+    float dy = ((y + 0.5f) - center_y) * pixel_size_y;
     return sqrtf(dx * dx + dy * dy);
 }
 
@@ -117,7 +120,7 @@ __global__ void apply_resolution_mask(uint8_t *mask,
         return;
     }
 
-    float distance_from_centre = get_distance_from_centre(
+    float distance_from_centre = get_distance_from_center(
       x, y, beam_center_x, beam_center_y, pixel_size_x, pixel_size_y);
     float resolution =
       get_resolution(wavelength, distance_to_detector, distance_from_centre);