Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

memory: Drop support for managed memory #445

Merged
merged 1 commit into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions benchmarks/stdgpu/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,6 @@ main(int argc, char* argv[])
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host),
stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::host) -
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host));
printf("| Managed %6" STDGPU_PRIINDEX64 " / %6" STDGPU_PRIINDEX64 " (%6" STDGPU_PRIINDEX64
") |\n",
stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::managed),
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::managed),
stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::managed) -
stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::managed));
printf("+---------------------------------------------------------+\n");

return EXIT_SUCCESS;
}
39 changes: 3 additions & 36 deletions src/stdgpu/cuda/impl/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,6 @@ malloc(const dynamic_memory_type type, void** array, index64_t bytes)
}
break;

case dynamic_memory_type::managed:
{
STDGPU_CUDA_SAFE_CALL(cudaMallocManaged(array, static_cast<std::size_t>(bytes)));
}
break;

case dynamic_memory_type::invalid:
default:
{
Expand All @@ -71,12 +65,6 @@ free(const dynamic_memory_type type, void* array)
}
break;

case dynamic_memory_type::managed:
{
STDGPU_CUDA_SAFE_CALL(cudaFree(array));
}
break;

case dynamic_memory_type::invalid:
default:
{
Expand All @@ -95,18 +83,15 @@ memcpy(void* destination,
{
cudaMemcpyKind kind;

if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
(source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::device)
{
kind = cudaMemcpyDeviceToDevice;
}
else if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
source_type == dynamic_memory_type::host)
else if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::host)
{
kind = cudaMemcpyHostToDevice;
}
else if (destination_type == dynamic_memory_type::host &&
(source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::device)
{
kind = cudaMemcpyDeviceToHost;
}
Expand All @@ -123,22 +108,4 @@ memcpy(void* destination,
STDGPU_CUDA_SAFE_CALL(cudaMemcpy(destination, source, static_cast<std::size_t>(bytes), kind));
}

void
workaround_synchronize_managed_memory()
{
// We need to synchronize the whole device before accessing managed memory on pre-Pascal GPUs
int current_device;
int hash_concurrent_managed_access;
STDGPU_CUDA_SAFE_CALL(cudaGetDevice(&current_device));
STDGPU_CUDA_SAFE_CALL(cudaDeviceGetAttribute(&hash_concurrent_managed_access,
cudaDevAttrConcurrentManagedAccess,
current_device));
if (hash_concurrent_managed_access == 0)
{
printf("stdgpu::cuda::workaround_synchronize_managed_memory : Synchronizing the whole GPU in order to access "
"the data on the host ...\n");
STDGPU_CUDA_SAFE_CALL(cudaDeviceSynchronize());
}
}

} // namespace stdgpu::cuda
6 changes: 0 additions & 6 deletions src/stdgpu/cuda/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@ memcpy(void* destination,
dynamic_memory_type destination_type,
dynamic_memory_type source_type);

/**
* \brief Workarounds a synchronization issue with older GPUs
*/
void
workaround_synchronize_managed_memory();

} // namespace stdgpu::cuda

#endif // STDGPU_CUDA_MEMORY_H
39 changes: 3 additions & 36 deletions src/stdgpu/hip/impl/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,6 @@ malloc(const dynamic_memory_type type, void** array, index64_t bytes)
}
break;

case dynamic_memory_type::managed:
{
STDGPU_HIP_SAFE_CALL(hipMallocManaged(array, static_cast<std::size_t>(bytes)));
}
break;

case dynamic_memory_type::invalid:
default:
{
Expand All @@ -71,12 +65,6 @@ free(const dynamic_memory_type type, void* array)
}
break;

case dynamic_memory_type::managed:
{
STDGPU_HIP_SAFE_CALL(hipFree(array));
}
break;

case dynamic_memory_type::invalid:
default:
{
Expand All @@ -95,18 +83,15 @@ memcpy(void* destination,
{
hipMemcpyKind kind;

if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
(source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::device)
{
kind = hipMemcpyDeviceToDevice;
}
else if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
source_type == dynamic_memory_type::host)
else if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::host)
{
kind = hipMemcpyHostToDevice;
}
else if (destination_type == dynamic_memory_type::host &&
(source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::device)
{
kind = hipMemcpyDeviceToHost;
}
Expand All @@ -123,22 +108,4 @@ memcpy(void* destination,
STDGPU_HIP_SAFE_CALL(hipMemcpy(destination, source, static_cast<std::size_t>(bytes), kind));
}

void
workaround_synchronize_managed_memory()
{
// We need to synchronize the whole device before accessing managed memory on old GPUs
int current_device;
int has_concurrent_managed_access;
STDGPU_HIP_SAFE_CALL(hipGetDevice(&current_device));
STDGPU_HIP_SAFE_CALL(hipDeviceGetAttribute(&has_concurrent_managed_access,
hipDeviceAttributeConcurrentManagedAccess,
current_device));
if (has_concurrent_managed_access == 0)
{
printf("stdgpu::hip::workaround_synchronize_managed_memory : Synchronizing the whole GPU in order to access "
"the data on the host ...\n");
STDGPU_HIP_SAFE_CALL(hipDeviceSynchronize());
}
}

} // namespace stdgpu::hip
6 changes: 0 additions & 6 deletions src/stdgpu/hip/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@ memcpy(void* destination,
dynamic_memory_type destination_type,
dynamic_memory_type source_type);

/**
* \brief Workarounds a synchronization issue with older GPUs
*/
void
workaround_synchronize_managed_memory();

} // namespace stdgpu::hip

#endif // STDGPU_HIP_MEMORY_H
29 changes: 2 additions & 27 deletions src/stdgpu/impl/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,12 +239,6 @@ dispatch_allocation_manager(const dynamic_memory_type type)
return manager_host;
}

case dynamic_memory_type::managed:
{
static memory_manager manager_managed;
return manager_managed;
}

case dynamic_memory_type::invalid:
default:
{
Expand All @@ -255,12 +249,6 @@ dispatch_allocation_manager(const dynamic_memory_type type)
}
}

void
workaround_synchronize_managed_memory()
{
stdgpu::STDGPU_BACKEND_NAMESPACE::workaround_synchronize_managed_memory();
}

[[nodiscard]] void*
allocate(index64_t bytes, dynamic_memory_type type)
{
Expand Down Expand Up @@ -310,15 +298,12 @@ memcpy(void* destination,
{
if (!external_memory)
{
if (!dispatch_allocation_manager(destination_type).contains_submemory(destination, bytes) &&
!dispatch_allocation_manager(dynamic_memory_type::managed).contains_submemory(destination, bytes))
if (!dispatch_allocation_manager(destination_type).contains_submemory(destination, bytes))
{
printf("stdgpu::detail::memcpy : Copying to unknown destination pointer not possible\n");
return;
}
if (!dispatch_allocation_manager(source_type).contains_submemory(const_cast<void*>(source), bytes) &&
!dispatch_allocation_manager(dynamic_memory_type::managed)
.contains_submemory(const_cast<void*>(source), bytes))
if (!dispatch_allocation_manager(source_type).contains_submemory(const_cast<void*>(source), bytes))
{
printf("stdgpu::detail::memcpy : Copying from unknown source pointer not possible\n");
return;
Expand All @@ -345,12 +330,6 @@ dispatch_size_manager(const dynamic_memory_type type)
return manager_host;
}

case dynamic_memory_type::managed:
{
static memory_manager manager_managed;
return manager_managed;
}

case dynamic_memory_type::invalid:
default:
{
Expand All @@ -375,10 +354,6 @@ get_dynamic_memory_type(void* array)
{
return dynamic_memory_type::host;
}
if (detail::dispatch_size_manager(dynamic_memory_type::managed).contains_memory(array))
{
return dynamic_memory_type::managed;
}

return dynamic_memory_type::invalid;
}
Expand Down
100 changes: 0 additions & 100 deletions src/stdgpu/impl/memory_detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,6 @@ unoptimized_destroy(ExecutionPolicy&& policy, Iterator first, Iterator last)
destroy_functor<Iterator>(first));
}

void
workaround_synchronize_managed_memory();

} // namespace stdgpu::detail

template <typename T>
Expand Down Expand Up @@ -179,61 +176,6 @@ createHostArray(const stdgpu::index64_t count, const T default_value)
return host_array;
}

template <typename T>
T*
createManagedArray(const stdgpu::index64_t count, const T default_value, const Initialization initialize_on)
{
using Allocator = stdgpu::safe_managed_allocator<T>;
Allocator managed_allocator;

T* managed_array = stdgpu::allocator_traits<Allocator>::allocate(managed_allocator, count);

if (managed_array == nullptr)
{
printf("createManagedArray : Failed to allocate array. Aborting ...\n");
return nullptr;
}

switch (initialize_on)
{
#if STDGPU_DETAIL_IS_DEVICE_COMPILED
case Initialization::DEVICE:
{
stdgpu::uninitialized_fill(stdgpu::execution::device,
stdgpu::device_begin(managed_array),
stdgpu::device_end(managed_array),
default_value);
}
break;
#else
case Initialization::DEVICE:
{
// Same as host path
}
[[fallthrough]];
#endif

case Initialization::HOST:
{
stdgpu::detail::workaround_synchronize_managed_memory();

stdgpu::uninitialized_fill(stdgpu::execution::host,
stdgpu::host_begin(managed_array),
stdgpu::host_end(managed_array),
default_value);
}
break;

default:
{
printf("createManagedArray : Invalid initialization device. Returning created but uninitialized array "
"...\n");
}
}

return managed_array;
}

template <typename T>
void
destroyDeviceArray(T*& device_array)
Expand Down Expand Up @@ -275,21 +217,6 @@ destroyHostArray(T*& host_array)
host_array = nullptr;
}

template <typename T>
void
destroyManagedArray(T*& managed_array)
{
using Allocator = stdgpu::safe_managed_allocator<T>;
Allocator managed_allocator;

// Call on host since the initialization place is not known
stdgpu::allocator_traits<Allocator>::deallocate_filled(stdgpu::execution::host,
managed_allocator,
managed_array,
stdgpu::size(managed_array));
managed_array = nullptr;
}

template <typename T>
T*
copyCreateDevice2HostArray(const T* device_array, const stdgpu::index64_t count, const MemoryCopy check_safety)
Expand Down Expand Up @@ -565,33 +492,6 @@ safe_host_allocator<T>::deallocate(T* p, index64_t n)
memory_type);
}

template <typename T>
template <typename U>
safe_managed_allocator<T>::safe_managed_allocator([[maybe_unused]] const safe_managed_allocator<U>& other) noexcept
{
}

template <typename T>
[[nodiscard]] T*
safe_managed_allocator<T>::allocate(index64_t n)
{
T* p = static_cast<T*>(
detail::allocate(n * static_cast<index64_t>(sizeof(T)), memory_type)); // NOLINT(bugprone-sizeof-expression)
register_memory(p, n, memory_type);
return p;
}

template <typename T>
void
safe_managed_allocator<T>::deallocate(T* p, index64_t n)
{
deregister_memory(p, n, memory_type);
// NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion)
detail::deallocate(static_cast<void*>(const_cast<std::remove_cv_t<T>*>(p)),
n * static_cast<index64_t>(sizeof(T)), // NOLINT(bugprone-sizeof-expression)
memory_type);
}

template <typename Allocator>
typename allocator_traits<Allocator>::pointer
allocator_traits<Allocator>::allocate(Allocator& a, typename allocator_traits<Allocator>::index_type n)
Expand Down
Loading