Skip to content

Commit

Permalink
memory: Drop support for managed memory
Browse files Browse the repository at this point in the history
  • Loading branch information
stotko committed Nov 19, 2024
1 parent 9005240 commit b751532
Show file tree
Hide file tree
Showing 12 changed files with 13 additions and 583 deletions.
7 changes: 0 additions & 7 deletions benchmarks/stdgpu/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,6 @@ main(int argc, char* argv[])
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host),
stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::host) -
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host));
printf("| Managed %6" STDGPU_PRIINDEX64 " / %6" STDGPU_PRIINDEX64 " (%6" STDGPU_PRIINDEX64
") |\n",
stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::managed),
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::managed),
stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::managed) -
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::managed));
printf("+---------------------------------------------------------+\n");

return EXIT_SUCCESS;
}
39 changes: 3 additions & 36 deletions src/stdgpu/cuda/impl/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,6 @@ malloc(const dynamic_memory_type type, void** array, index64_t bytes)
}
break;

case dynamic_memory_type::managed:
{
STDGPU_CUDA_SAFE_CALL(cudaMallocManaged(array, static_cast<std::size_t>(bytes)));
}
break;

case dynamic_memory_type::invalid:
default:
{
Expand All @@ -71,12 +65,6 @@ free(const dynamic_memory_type type, void* array)
}
break;

case dynamic_memory_type::managed:
{
STDGPU_CUDA_SAFE_CALL(cudaFree(array));
}
break;

case dynamic_memory_type::invalid:
default:
{
Expand All @@ -95,18 +83,15 @@ memcpy(void* destination,
{
cudaMemcpyKind kind;

if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
(source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::device)
{
kind = cudaMemcpyDeviceToDevice;
}
else if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
source_type == dynamic_memory_type::host)
else if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::host)
{
kind = cudaMemcpyHostToDevice;
}
else if (destination_type == dynamic_memory_type::host &&
(source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::device)
{
kind = cudaMemcpyDeviceToHost;
}
Expand All @@ -123,22 +108,4 @@ memcpy(void* destination,
STDGPU_CUDA_SAFE_CALL(cudaMemcpy(destination, source, static_cast<std::size_t>(bytes), kind));
}

void
workaround_synchronize_managed_memory()
{
// We need to synchronize the whole device before accessing managed memory on pre-Pascal GPUs
int current_device;
int hash_concurrent_managed_access;
STDGPU_CUDA_SAFE_CALL(cudaGetDevice(&current_device));
STDGPU_CUDA_SAFE_CALL(cudaDeviceGetAttribute(&hash_concurrent_managed_access,
cudaDevAttrConcurrentManagedAccess,
current_device));
if (hash_concurrent_managed_access == 0)
{
printf("stdgpu::cuda::workaround_synchronize_managed_memory : Synchronizing the whole GPU in order to access "
"the data on the host ...\n");
STDGPU_CUDA_SAFE_CALL(cudaDeviceSynchronize());
}
}

} // namespace stdgpu::cuda
6 changes: 0 additions & 6 deletions src/stdgpu/cuda/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@ memcpy(void* destination,
dynamic_memory_type destination_type,
dynamic_memory_type source_type);

/**
* \brief Workarounds a synchronization issue with older GPUs
*/
void
workaround_synchronize_managed_memory();

} // namespace stdgpu::cuda

#endif // STDGPU_CUDA_MEMORY_H
39 changes: 3 additions & 36 deletions src/stdgpu/hip/impl/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,6 @@ malloc(const dynamic_memory_type type, void** array, index64_t bytes)
}
break;

case dynamic_memory_type::managed:
{
STDGPU_HIP_SAFE_CALL(hipMallocManaged(array, static_cast<std::size_t>(bytes)));
}
break;

case dynamic_memory_type::invalid:
default:
{
Expand All @@ -71,12 +65,6 @@ free(const dynamic_memory_type type, void* array)
}
break;

case dynamic_memory_type::managed:
{
STDGPU_HIP_SAFE_CALL(hipFree(array));
}
break;

case dynamic_memory_type::invalid:
default:
{
Expand All @@ -95,18 +83,15 @@ memcpy(void* destination,
{
hipMemcpyKind kind;

if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
(source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::device)
{
kind = hipMemcpyDeviceToDevice;
}
else if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
source_type == dynamic_memory_type::host)
else if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::host)
{
kind = hipMemcpyHostToDevice;
}
else if (destination_type == dynamic_memory_type::host &&
(source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::device)
{
kind = hipMemcpyDeviceToHost;
}
Expand All @@ -123,22 +108,4 @@ memcpy(void* destination,
STDGPU_HIP_SAFE_CALL(hipMemcpy(destination, source, static_cast<std::size_t>(bytes), kind));
}

void
workaround_synchronize_managed_memory()
{
// We need to synchronize the whole device before accessing managed memory on old GPUs
int current_device;
int has_concurrent_managed_access;
STDGPU_HIP_SAFE_CALL(hipGetDevice(&current_device));
STDGPU_HIP_SAFE_CALL(hipDeviceGetAttribute(&has_concurrent_managed_access,
hipDeviceAttributeConcurrentManagedAccess,
current_device));
if (has_concurrent_managed_access == 0)
{
printf("stdgpu::hip::workaround_synchronize_managed_memory : Synchronizing the whole GPU in order to access "
"the data on the host ...\n");
STDGPU_HIP_SAFE_CALL(hipDeviceSynchronize());
}
}

} // namespace stdgpu::hip
6 changes: 0 additions & 6 deletions src/stdgpu/hip/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@ memcpy(void* destination,
dynamic_memory_type destination_type,
dynamic_memory_type source_type);

/**
* \brief Workarounds a synchronization issue with older GPUs
*/
void
workaround_synchronize_managed_memory();

} // namespace stdgpu::hip

#endif // STDGPU_HIP_MEMORY_H
29 changes: 2 additions & 27 deletions src/stdgpu/impl/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,12 +239,6 @@ dispatch_allocation_manager(const dynamic_memory_type type)
return manager_host;
}

case dynamic_memory_type::managed:
{
static memory_manager manager_managed;
return manager_managed;
}

case dynamic_memory_type::invalid:
default:
{
Expand All @@ -255,12 +249,6 @@ dispatch_allocation_manager(const dynamic_memory_type type)
}
}

void
workaround_synchronize_managed_memory()
{
stdgpu::STDGPU_BACKEND_NAMESPACE::workaround_synchronize_managed_memory();
}

[[nodiscard]] void*
allocate(index64_t bytes, dynamic_memory_type type)
{
Expand Down Expand Up @@ -310,15 +298,12 @@ memcpy(void* destination,
{
if (!external_memory)
{
if (!dispatch_allocation_manager(destination_type).contains_submemory(destination, bytes) &&
!dispatch_allocation_manager(dynamic_memory_type::managed).contains_submemory(destination, bytes))
if (!dispatch_allocation_manager(destination_type).contains_submemory(destination, bytes))
{
printf("stdgpu::detail::memcpy : Copying to unknown destination pointer not possible\n");
return;
}
if (!dispatch_allocation_manager(source_type).contains_submemory(const_cast<void*>(source), bytes) &&
!dispatch_allocation_manager(dynamic_memory_type::managed)
.contains_submemory(const_cast<void*>(source), bytes))
if (!dispatch_allocation_manager(source_type).contains_submemory(const_cast<void*>(source), bytes))
{
printf("stdgpu::detail::memcpy : Copying from unknown source pointer not possible\n");
return;
Expand All @@ -345,12 +330,6 @@ dispatch_size_manager(const dynamic_memory_type type)
return manager_host;
}

case dynamic_memory_type::managed:
{
static memory_manager manager_managed;
return manager_managed;
}

case dynamic_memory_type::invalid:
default:
{
Expand All @@ -375,10 +354,6 @@ get_dynamic_memory_type(void* array)
{
return dynamic_memory_type::host;
}
if (detail::dispatch_size_manager(dynamic_memory_type::managed).contains_memory(array))
{
return dynamic_memory_type::managed;
}

return dynamic_memory_type::invalid;
}
Expand Down
100 changes: 0 additions & 100 deletions src/stdgpu/impl/memory_detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,6 @@ unoptimized_destroy(ExecutionPolicy&& policy, Iterator first, Iterator last)
destroy_functor<Iterator>(first));
}

void
workaround_synchronize_managed_memory();

} // namespace stdgpu::detail

template <typename T>
Expand Down Expand Up @@ -179,61 +176,6 @@ createHostArray(const stdgpu::index64_t count, const T default_value)
return host_array;
}

template <typename T>
T*
createManagedArray(const stdgpu::index64_t count, const T default_value, const Initialization initialize_on)
{
using Allocator = stdgpu::safe_managed_allocator<T>;
Allocator managed_allocator;

T* managed_array = stdgpu::allocator_traits<Allocator>::allocate(managed_allocator, count);

if (managed_array == nullptr)
{
printf("createManagedArray : Failed to allocate array. Aborting ...\n");
return nullptr;
}

switch (initialize_on)
{
#if STDGPU_DETAIL_IS_DEVICE_COMPILED
case Initialization::DEVICE:
{
stdgpu::uninitialized_fill(stdgpu::execution::device,
stdgpu::device_begin(managed_array),
stdgpu::device_end(managed_array),
default_value);
}
break;
#else
case Initialization::DEVICE:
{
// Same as host path
}
[[fallthrough]];
#endif

case Initialization::HOST:
{
stdgpu::detail::workaround_synchronize_managed_memory();

stdgpu::uninitialized_fill(stdgpu::execution::host,
stdgpu::host_begin(managed_array),
stdgpu::host_end(managed_array),
default_value);
}
break;

default:
{
printf("createManagedArray : Invalid initialization device. Returning created but uninitialized array "
"...\n");
}
}

return managed_array;
}

template <typename T>
void
destroyDeviceArray(T*& device_array)
Expand Down Expand Up @@ -275,21 +217,6 @@ destroyHostArray(T*& host_array)
host_array = nullptr;
}

template <typename T>
void
destroyManagedArray(T*& managed_array)
{
using Allocator = stdgpu::safe_managed_allocator<T>;
Allocator managed_allocator;

// Call on host since the initialization place is not known
stdgpu::allocator_traits<Allocator>::deallocate_filled(stdgpu::execution::host,
managed_allocator,
managed_array,
stdgpu::size(managed_array));
managed_array = nullptr;
}

template <typename T>
T*
copyCreateDevice2HostArray(const T* device_array, const stdgpu::index64_t count, const MemoryCopy check_safety)
Expand Down Expand Up @@ -565,33 +492,6 @@ safe_host_allocator<T>::deallocate(T* p, index64_t n)
memory_type);
}

template <typename T>
template <typename U>
safe_managed_allocator<T>::safe_managed_allocator([[maybe_unused]] const safe_managed_allocator<U>& other) noexcept
{
}

template <typename T>
[[nodiscard]] T*
safe_managed_allocator<T>::allocate(index64_t n)
{
T* p = static_cast<T*>(
detail::allocate(n * static_cast<index64_t>(sizeof(T)), memory_type)); // NOLINT(bugprone-sizeof-expression)
register_memory(p, n, memory_type);
return p;
}

template <typename T>
void
safe_managed_allocator<T>::deallocate(T* p, index64_t n)
{
deregister_memory(p, n, memory_type);
// NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion)
detail::deallocate(static_cast<void*>(const_cast<std::remove_cv_t<T>*>(p)),
n * static_cast<index64_t>(sizeof(T)), // NOLINT(bugprone-sizeof-expression)
memory_type);
}

template <typename Allocator>
typename allocator_traits<Allocator>::pointer
allocator_traits<Allocator>::allocate(Allocator& a, typename allocator_traits<Allocator>::index_type n)
Expand Down
Loading

0 comments on commit b751532

Please sign in to comment.