From c8a56f3060fa8e8323edee3fb958ea595db692fe Mon Sep 17 00:00:00 2001 From: Patrick Stotko Date: Tue, 19 Nov 2024 10:50:39 +0100 Subject: [PATCH] memory: Make backend interface more low level --- src/stdgpu/cuda/impl/error.h | 1 + src/stdgpu/cuda/impl/memory.cpp | 102 +++++++++--------------------- src/stdgpu/cuda/memory.h | 62 ++++++++++++++---- src/stdgpu/hip/impl/error.h | 1 + src/stdgpu/hip/impl/memory.cpp | 102 +++++++++--------------------- src/stdgpu/hip/memory.h | 62 ++++++++++++++---- src/stdgpu/impl/memory.cpp | 65 ++++++++++++++++++- src/stdgpu/openmp/impl/memory.cpp | 76 ++++++++++------------ src/stdgpu/openmp/memory.h | 62 ++++++++++++++---- 9 files changed, 305 insertions(+), 228 deletions(-) diff --git a/src/stdgpu/cuda/impl/error.h b/src/stdgpu/cuda/impl/error.h index 4246ab774..6058147f3 100644 --- a/src/stdgpu/cuda/impl/error.h +++ b/src/stdgpu/cuda/impl/error.h @@ -16,6 +16,7 @@ #ifndef STDGPU_CUDA_ERROR_H #define STDGPU_CUDA_ERROR_H +#include #include #include diff --git a/src/stdgpu/cuda/impl/memory.cpp b/src/stdgpu/cuda/impl/memory.cpp index 019e58535..8c815bd51 100644 --- a/src/stdgpu/cuda/impl/memory.cpp +++ b/src/stdgpu/cuda/impl/memory.cpp @@ -15,97 +15,57 @@ #include -#include - #include namespace stdgpu::cuda { void -malloc(const dynamic_memory_type type, void** array, index64_t bytes) +malloc_device(void** array, index64_t bytes) { - switch (type) - { - case dynamic_memory_type::device: - { - STDGPU_CUDA_SAFE_CALL(cudaMalloc(array, static_cast(bytes))); - } - break; - - case dynamic_memory_type::host: - { - STDGPU_CUDA_SAFE_CALL(cudaMallocHost(array, static_cast(bytes))); - } - break; + STDGPU_CUDA_SAFE_CALL(cudaMalloc(array, static_cast(bytes))); +} - case dynamic_memory_type::invalid: - default: - { - printf("stdgpu::cuda::malloc : Unsupported dynamic memory type\n"); - return; - } - } +void +malloc_host(void** array, index64_t bytes) +{ + STDGPU_CUDA_SAFE_CALL(cudaMallocHost(array, static_cast(bytes))); } void -free(const dynamic_memory_type type, void* array) +free_device(void* array) { - switch (type) - { - case dynamic_memory_type::device: - { - STDGPU_CUDA_SAFE_CALL(cudaFree(array)); - } - break; + STDGPU_CUDA_SAFE_CALL(cudaFree(array)); +} - case dynamic_memory_type::host: - { - STDGPU_CUDA_SAFE_CALL(cudaFreeHost(array)); - } - break; +void +free_host(void* array) +{ + STDGPU_CUDA_SAFE_CALL(cudaFreeHost(array)); +} - case dynamic_memory_type::invalid: - default: - { - printf("stdgpu::cuda::free : Unsupported dynamic memory type\n"); - return; - } - } +void +memcpy_device_to_device(void* destination, const void* source, index64_t bytes) +{ + STDGPU_CUDA_SAFE_CALL(cudaMemcpy(destination, source, static_cast(bytes), cudaMemcpyDeviceToDevice)); } void -memcpy(void* destination, - const void* source, - index64_t bytes, - dynamic_memory_type destination_type, - dynamic_memory_type source_type) +memcpy_device_to_host(void* destination, const void* source, index64_t bytes) { - cudaMemcpyKind kind; + STDGPU_CUDA_SAFE_CALL(cudaMemcpy(destination, source, static_cast(bytes), cudaMemcpyDeviceToHost)); +} - if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::device) - { - kind = cudaMemcpyDeviceToDevice; - } - else if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::host) - { - kind = cudaMemcpyHostToDevice; - } - else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::device) - { - kind = cudaMemcpyDeviceToHost; - } - else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::host) - { - kind = cudaMemcpyHostToHost; - } - else - { - printf("stdgpu::cuda::memcpy : Unsupported dynamic source or destination memory type\n"); - return; - } +void +memcpy_host_to_device(void* destination, const void* source, index64_t bytes) +{ + STDGPU_CUDA_SAFE_CALL(cudaMemcpy(destination, source, static_cast(bytes), cudaMemcpyHostToDevice)); +} - STDGPU_CUDA_SAFE_CALL(cudaMemcpy(destination, source, static_cast(bytes), kind)); +void +memcpy_host_to_host(void* destination, const void* source, index64_t bytes) +{ + STDGPU_CUDA_SAFE_CALL(cudaMemcpy(destination, source, static_cast(bytes), cudaMemcpyHostToHost)); } } // namespace stdgpu::cuda diff --git a/src/stdgpu/cuda/memory.h b/src/stdgpu/cuda/memory.h index 72c97ffb9..7c6af7a17 100644 --- a/src/stdgpu/cuda/memory.h +++ b/src/stdgpu/cuda/memory.h @@ -17,42 +17,78 @@ #define STDGPU_CUDA_MEMORY_H #include -#include namespace stdgpu::cuda { /** - * \brief Performs platform-specific memory allocation + * \brief Performs platform-specific memory allocation on the device + * \param[in] array A pointer to the allocated array + * \param[in] bytes The size of the allocated array + */ +void +malloc_device(void** array, index64_t bytes); + +/** + * \brief Performs platform-specific memory allocation on the host * \param[in] type The type of the memory to allocate * \param[in] array A pointer to the allocated array * \param[in] bytes The size of the allocated array */ void -malloc(const dynamic_memory_type type, void** array, index64_t bytes); +malloc_host(void** array, index64_t bytes); /** - * \brief Performs platform-specific memory deallocation + * \brief Performs platform-specific memory deallocation on the device * \param[in] type The type of the memory to deallocate * \param[in] array The allocated array */ void -free(const dynamic_memory_type type, void* array); +free_device(void* array); + +/** + * \brief Performs platform-specific memory deallocation on the host + * \param[in] type The type of the memory to deallocate + * \param[in] array The allocated array + */ +void +free_host(void* array); + +/** + * \brief Performs platform-specific memory copy from device to device + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +void +memcpy_device_to_device(void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from device to host + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +void +memcpy_device_to_host(void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from host to device + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +void +memcpy_host_to_device(void* destination, const void* source, index64_t bytes); /** - * \brief Performs platform-specific memory copy + * \brief Performs platform-specific memory copy from host to host * \param[in] destination The destination array * \param[in] source The source array * \param[in] bytes The size of the allocated array - * \param[in] destination_type The type of the destination array - * \param[in] source_type The type of the source array */ void -memcpy(void* destination, - const void* source, - index64_t bytes, - dynamic_memory_type destination_type, - dynamic_memory_type source_type); +memcpy_host_to_host(void* destination, const void* source, index64_t bytes); } // namespace stdgpu::cuda diff --git a/src/stdgpu/hip/impl/error.h b/src/stdgpu/hip/impl/error.h index 1fb6d4d5a..8953fdaa2 100644 --- a/src/stdgpu/hip/impl/error.h +++ b/src/stdgpu/hip/impl/error.h @@ -16,6 +16,7 @@ #ifndef STDGPU_HIP_ERROR_H #define STDGPU_HIP_ERROR_H +#include #include #include diff --git a/src/stdgpu/hip/impl/memory.cpp b/src/stdgpu/hip/impl/memory.cpp index 55a9aa557..6dab8040c 100644 --- a/src/stdgpu/hip/impl/memory.cpp +++ b/src/stdgpu/hip/impl/memory.cpp @@ -15,97 +15,57 @@ #include -#include - #include namespace stdgpu::hip { void -malloc(const dynamic_memory_type type, void** array, index64_t bytes) +malloc_device(void** array, index64_t bytes) { - switch (type) - { - case dynamic_memory_type::device: - { - STDGPU_HIP_SAFE_CALL(hipMalloc(array, static_cast(bytes))); - } - break; - - case dynamic_memory_type::host: - { - STDGPU_HIP_SAFE_CALL(hipHostMalloc(array, static_cast(bytes))); - } - break; + STDGPU_HIP_SAFE_CALL(hipMalloc(array, static_cast(bytes))); +} - case dynamic_memory_type::invalid: - default: - { - printf("stdgpu::hip::malloc : Unsupported dynamic memory type\n"); - return; - } - } +void +malloc_host(void** array, index64_t bytes) +{ + STDGPU_HIP_SAFE_CALL(hipHostMalloc(array, static_cast(bytes))); } void -free(const dynamic_memory_type type, void* array) +free_device(void* array) { - switch (type) - { - case dynamic_memory_type::device: - { - STDGPU_HIP_SAFE_CALL(hipFree(array)); - } - break; + STDGPU_HIP_SAFE_CALL(hipFree(array)); +} - case dynamic_memory_type::host: - { - STDGPU_HIP_SAFE_CALL(hipHostFree(array)); - } - break; +void +free_host(void* array) +{ + STDGPU_HIP_SAFE_CALL(hipHostFree(array)); +} - case dynamic_memory_type::invalid: - default: - { - printf("stdgpu::hip::free : Unsupported dynamic memory type\n"); - return; - } - } +void +memcpy_device_to_device(void* destination, const void* source, index64_t bytes) +{ + STDGPU_HIP_SAFE_CALL(hipMemcpy(destination, source, static_cast(bytes), hipMemcpyDeviceToDevice)); } void -memcpy(void* destination, - const void* source, - index64_t bytes, - dynamic_memory_type destination_type, - dynamic_memory_type source_type) +memcpy_device_to_host(void* destination, const void* source, index64_t bytes) { - hipMemcpyKind kind; + STDGPU_HIP_SAFE_CALL(hipMemcpy(destination, source, static_cast(bytes), hipMemcpyDeviceToHost)); +} - if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::device) - { - kind = hipMemcpyDeviceToDevice; - } - else if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::host) - { - kind = hipMemcpyHostToDevice; - } - else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::device) - { - kind = hipMemcpyDeviceToHost; - } - else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::host) - { - kind = hipMemcpyHostToHost; - } - else - { - printf("stdgpu::hip::memcpy : Unsupported dynamic source or destination memory type\n"); - return; - } +void +memcpy_host_to_device(void* destination, const void* source, index64_t bytes) +{ + STDGPU_HIP_SAFE_CALL(hipMemcpy(destination, source, static_cast(bytes), hipMemcpyHostToDevice)); +} - STDGPU_HIP_SAFE_CALL(hipMemcpy(destination, source, static_cast(bytes), kind)); +void +memcpy_host_to_host(void* destination, const void* source, index64_t bytes) +{ + STDGPU_HIP_SAFE_CALL(hipMemcpy(destination, source, static_cast(bytes), hipMemcpyHostToHost)); } } // namespace stdgpu::hip diff --git a/src/stdgpu/hip/memory.h b/src/stdgpu/hip/memory.h index c4b45394b..9c215c49e 100644 --- a/src/stdgpu/hip/memory.h +++ b/src/stdgpu/hip/memory.h @@ -17,42 +17,78 @@ #define STDGPU_HIP_MEMORY_H #include -#include namespace stdgpu::hip { /** - * \brief Performs platform-specific memory allocation + * \brief Performs platform-specific memory allocation on the device + * \param[in] array A pointer to the allocated array + * \param[in] bytes The size of the allocated array + */ +void +malloc_device(void** array, index64_t bytes); + +/** + * \brief Performs platform-specific memory allocation on the host * \param[in] type The type of the memory to allocate * \param[in] array A pointer to the allocated array * \param[in] bytes The size of the allocated array */ void -malloc(const dynamic_memory_type type, void** array, index64_t bytes); +malloc_host(void** array, index64_t bytes); /** - * \brief Performs platform-specific memory deallocation + * \brief Performs platform-specific memory deallocation on the device * \param[in] type The type of the memory to deallocate * \param[in] array The allocated array */ void -free(const dynamic_memory_type type, void* array); +free_device(void* array); + +/** + * \brief Performs platform-specific memory deallocation on the host + * \param[in] type The type of the memory to deallocate + * \param[in] array The allocated array + */ +void +free_host(void* array); + +/** + * \brief Performs platform-specific memory copy from device to device + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +void +memcpy_device_to_device(void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from device to host + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +void +memcpy_device_to_host(void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from host to device + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +void +memcpy_host_to_device(void* destination, const void* source, index64_t bytes); /** - * \brief Performs platform-specific memory copy + * \brief Performs platform-specific memory copy from host to host * \param[in] destination The destination array * \param[in] source The source array * \param[in] bytes The size of the allocated array - * \param[in] destination_type The type of the destination array - * \param[in] source_type The type of the source array */ void -memcpy(void* destination, - const void* source, - index64_t bytes, - dynamic_memory_type destination_type, - dynamic_memory_type source_type); +memcpy_host_to_host(void* destination, const void* source, index64_t bytes); } // namespace stdgpu::hip diff --git a/src/stdgpu/impl/memory.cpp b/src/stdgpu/impl/memory.cpp index 10864ba3b..6d67bd82f 100644 --- a/src/stdgpu/impl/memory.cpp +++ b/src/stdgpu/impl/memory.cpp @@ -259,8 +259,27 @@ allocate(index64_t bytes, dynamic_memory_type type) } void* array = nullptr; + switch (type) + { + case dynamic_memory_type::device: + { + stdgpu::STDGPU_BACKEND_NAMESPACE::malloc_device(&array, bytes); + } + break; - stdgpu::STDGPU_BACKEND_NAMESPACE::malloc(type, &array, bytes); + case dynamic_memory_type::host: + { + stdgpu::STDGPU_BACKEND_NAMESPACE::malloc_host(&array, bytes); + } + break; + + case dynamic_memory_type::invalid: + default: + { + printf("stdgpu::detail::allocate : Unsupported dynamic memory type\n"); + return nullptr; + } + } // Update pointer management after allocation dispatch_allocation_manager(type).register_memory(array, bytes); @@ -285,7 +304,27 @@ deallocate(void* p, index64_t bytes, dynamic_memory_type type) // Update pointer management before freeing dispatch_allocation_manager(type).deregister_memory(p, bytes); - stdgpu::STDGPU_BACKEND_NAMESPACE::free(type, p); + switch (type) + { + case dynamic_memory_type::device: + { + stdgpu::STDGPU_BACKEND_NAMESPACE::free_device(p); + } + break; + + case dynamic_memory_type::host: + { + stdgpu::STDGPU_BACKEND_NAMESPACE::free_host(p); + } + break; + + case dynamic_memory_type::invalid: + default: + { + printf("stdgpu::detail::deallocate : Unsupported dynamic memory type\n"); + return; + } + } } void @@ -310,7 +349,27 @@ memcpy(void* destination, } } - stdgpu::STDGPU_BACKEND_NAMESPACE::memcpy(destination, source, bytes, destination_type, source_type); + if (source_type == dynamic_memory_type::device && destination_type == dynamic_memory_type::device) + { + stdgpu::STDGPU_BACKEND_NAMESPACE::memcpy_device_to_device(destination, source, bytes); + } + else if (source_type == dynamic_memory_type::device && destination_type == dynamic_memory_type::host) + { + stdgpu::STDGPU_BACKEND_NAMESPACE::memcpy_device_to_host(destination, source, bytes); + } + else if (source_type == dynamic_memory_type::host && destination_type == dynamic_memory_type::device) + { + stdgpu::STDGPU_BACKEND_NAMESPACE::memcpy_host_to_device(destination, source, bytes); + } + else if (source_type == dynamic_memory_type::host && destination_type == dynamic_memory_type::host) + { + stdgpu::STDGPU_BACKEND_NAMESPACE::memcpy_host_to_host(destination, source, bytes); + } + else + { + printf("stdgpu::detail::memcpy : Unsupported dynamic source or destination memory type\n"); + return; + } } memory_manager& diff --git a/src/stdgpu/openmp/impl/memory.cpp b/src/stdgpu/openmp/impl/memory.cpp index 25471e8bb..862dd92ef 100644 --- a/src/stdgpu/openmp/impl/memory.cpp +++ b/src/stdgpu/openmp/impl/memory.cpp @@ -15,7 +15,6 @@ #include -#include #include #include @@ -23,61 +22,50 @@ namespace stdgpu::openmp { void -malloc(const dynamic_memory_type type, void** array, index64_t bytes) +malloc_device(void** array, index64_t bytes) { - switch (type) - { - case dynamic_memory_type::device: - case dynamic_memory_type::host: - { - *array = - std::malloc(static_cast(bytes)); // NOLINT(hicpp-no-malloc,cppcoreguidelines-no-malloc) - } - break; + *array = std::malloc(static_cast(bytes)); // NOLINT(hicpp-no-malloc,cppcoreguidelines-no-malloc) +} + +void +malloc_host(void** array, index64_t bytes) +{ + *array = std::malloc(static_cast(bytes)); // NOLINT(hicpp-no-malloc,cppcoreguidelines-no-malloc) +} - case dynamic_memory_type::invalid: - default: - { - printf("stdgpu::openmp::malloc : Unsupported dynamic memory type\n"); - return; - } - } +void +free_device(void* array) +{ + std::free(array); // NOLINT(hicpp-no-malloc,cppcoreguidelines-no-malloc) +} + +void +free_host(void* array) +{ + std::free(array); // NOLINT(hicpp-no-malloc,cppcoreguidelines-no-malloc) } void -free(const dynamic_memory_type type, void* array) +memcpy_device_to_device(void* destination, const void* source, index64_t bytes) { - switch (type) - { - case dynamic_memory_type::device: - case dynamic_memory_type::host: - { - std::free(array); // NOLINT(hicpp-no-malloc,cppcoreguidelines-no-malloc) - } - break; + std::memcpy(destination, source, static_cast(bytes)); +} - case dynamic_memory_type::invalid: - default: - { - printf("stdgpu::openmp::free : Unsupported dynamic memory type\n"); - return; - } - } +void +memcpy_device_to_host(void* destination, const void* source, index64_t bytes) +{ + std::memcpy(destination, source, static_cast(bytes)); } void -memcpy(void* destination, - const void* source, - index64_t bytes, - dynamic_memory_type destination_type, - dynamic_memory_type source_type) +memcpy_host_to_device(void* destination, const void* source, index64_t bytes) { - if (destination_type == dynamic_memory_type::invalid || source_type == dynamic_memory_type::invalid) - { - printf("stdgpu::openmp::memcpy : Unsupported dynamic source or destination memory type\n"); - return; - } + std::memcpy(destination, source, static_cast(bytes)); +} +void +memcpy_host_to_host(void* destination, const void* source, index64_t bytes) +{ std::memcpy(destination, source, static_cast(bytes)); } diff --git a/src/stdgpu/openmp/memory.h b/src/stdgpu/openmp/memory.h index 0019cac25..130ec9d7b 100644 --- a/src/stdgpu/openmp/memory.h +++ b/src/stdgpu/openmp/memory.h @@ -17,42 +17,78 @@ #define STDGPU_OPENMP_MEMORY_H #include -#include namespace stdgpu::openmp { /** - * \brief Performs platform-specific memory allocation + * \brief Performs platform-specific memory allocation on the device + * \param[in] array A pointer to the allocated array + * \param[in] bytes The size of the allocated array + */ +void +malloc_device(void** array, index64_t bytes); + +/** + * \brief Performs platform-specific memory allocation on the host * \param[in] type The type of the memory to allocate * \param[in] array A pointer to the allocated array * \param[in] bytes The size of the allocated array */ void -malloc(const dynamic_memory_type type, void** array, index64_t bytes); +malloc_host(void** array, index64_t bytes); /** - * \brief Performs platform-specific memory deallocation + * \brief Performs platform-specific memory deallocation on the device * \param[in] type The type of the memory to deallocate * \param[in] array The allocated array */ void -free(const dynamic_memory_type type, void* array); +free_device(void* array); + +/** + * \brief Performs platform-specific memory deallocation on the host + * \param[in] type The type of the memory to deallocate + * \param[in] array The allocated array + */ +void +free_host(void* array); + +/** + * \brief Performs platform-specific memory copy from device to device + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +void +memcpy_device_to_device(void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from device to host + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +void +memcpy_device_to_host(void* destination, const void* source, index64_t bytes); + +/** + * \brief Performs platform-specific memory copy from host to device + * \param[in] destination The destination array + * \param[in] source The source array + * \param[in] bytes The size of the allocated array + */ +void +memcpy_host_to_device(void* destination, const void* source, index64_t bytes); /** - * \brief Performs platform-specific memory copy + * \brief Performs platform-specific memory copy from host to host * \param[in] destination The destination array * \param[in] source The source array * \param[in] bytes The size of the allocated array - * \param[in] destination_type The type of the destination array - * \param[in] source_type The type of the source array */ void -memcpy(void* destination, - const void* source, - index64_t bytes, - dynamic_memory_type destination_type, - dynamic_memory_type source_type); +memcpy_host_to_host(void* destination, const void* source, index64_t bytes); } // namespace stdgpu::openmp