memory: Drop support for managed memory

stotko · Nov 19, 2024 · 535dae5 · 535dae5
1 parent 9005240
commit 535dae5
Show file tree

Hide file tree

Showing 12 changed files with 13 additions and 583 deletions.
diff --git a/benchmarks/stdgpu/main.cpp b/benchmarks/stdgpu/main.cpp
@@ -65,13 +65,6 @@ main(int argc, char* argv[])
            stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host),
            stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::host) -
                    stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host));
-    printf("|   Managed    %6" STDGPU_PRIINDEX64 " / %6" STDGPU_PRIINDEX64 " (%6" STDGPU_PRIINDEX64
-           ")                   |\n",
-           stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::managed),
-           stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::managed),
-           stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::managed) -
-                   stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::managed));
-    printf("+---------------------------------------------------------+\n");
 
     return EXIT_SUCCESS;
 }
diff --git a/src/stdgpu/cuda/impl/memory.cpp b/src/stdgpu/cuda/impl/memory.cpp
@@ -39,12 +39,6 @@ malloc(const dynamic_memory_type type, void** array, index64_t bytes)
         }
         break;
 
-        case dynamic_memory_type::managed:
-        {
-            STDGPU_CUDA_SAFE_CALL(cudaMallocManaged(array, static_cast<std::size_t>(bytes)));
-        }
-        break;
-
         case dynamic_memory_type::invalid:
         default:
         {
@@ -71,12 +65,6 @@ free(const dynamic_memory_type type, void* array)
         }
         break;
 
-        case dynamic_memory_type::managed:
-        {
-            STDGPU_CUDA_SAFE_CALL(cudaFree(array));
-        }
-        break;
-
         case dynamic_memory_type::invalid:
         default:
         {
@@ -95,18 +83,15 @@ memcpy(void* destination,
 {
     cudaMemcpyKind kind;
 
-    if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
-        (source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
+    if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::device)
     {
         kind = cudaMemcpyDeviceToDevice;
     }
-    else if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
-             source_type == dynamic_memory_type::host)
+    else if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::host)
     {
         kind = cudaMemcpyHostToDevice;
     }
-    else if (destination_type == dynamic_memory_type::host &&
-             (source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
+    else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::device)
     {
         kind = cudaMemcpyDeviceToHost;
     }
@@ -123,22 +108,4 @@ memcpy(void* destination,
     STDGPU_CUDA_SAFE_CALL(cudaMemcpy(destination, source, static_cast<std::size_t>(bytes), kind));
 }
 
-void
-workaround_synchronize_managed_memory()
-{
-    // We need to synchronize the whole device before accessing managed memory on pre-Pascal GPUs
-    int current_device;
-    int hash_concurrent_managed_access;
-    STDGPU_CUDA_SAFE_CALL(cudaGetDevice(&current_device));
-    STDGPU_CUDA_SAFE_CALL(cudaDeviceGetAttribute(&hash_concurrent_managed_access,
-                                                 cudaDevAttrConcurrentManagedAccess,
-                                                 current_device));
-    if (hash_concurrent_managed_access == 0)
-    {
-        printf("stdgpu::cuda::workaround_synchronize_managed_memory : Synchronizing the whole GPU in order to access "
-               "the data on the host ...\n");
-        STDGPU_CUDA_SAFE_CALL(cudaDeviceSynchronize());
-    }
-}
-
 } // namespace stdgpu::cuda
diff --git a/src/stdgpu/cuda/memory.h b/src/stdgpu/cuda/memory.h
@@ -54,12 +54,6 @@ memcpy(void* destination,
        dynamic_memory_type destination_type,
        dynamic_memory_type source_type);
 
-/**
- * \brief Workarounds a synchronization issue with older GPUs
- */
-void
-workaround_synchronize_managed_memory();
-
 } // namespace stdgpu::cuda
 
 #endif // STDGPU_CUDA_MEMORY_H
diff --git a/src/stdgpu/hip/impl/memory.cpp b/src/stdgpu/hip/impl/memory.cpp
@@ -39,12 +39,6 @@ malloc(const dynamic_memory_type type, void** array, index64_t bytes)
         }
         break;
 
-        case dynamic_memory_type::managed:
-        {
-            STDGPU_HIP_SAFE_CALL(hipMallocManaged(array, static_cast<std::size_t>(bytes)));
-        }
-        break;
-
         case dynamic_memory_type::invalid:
         default:
         {
@@ -71,12 +65,6 @@ free(const dynamic_memory_type type, void* array)
         }
         break;
 
-        case dynamic_memory_type::managed:
-        {
-            STDGPU_HIP_SAFE_CALL(hipFree(array));
-        }
-        break;
-
         case dynamic_memory_type::invalid:
         default:
         {
@@ -95,18 +83,15 @@ memcpy(void* destination,
 {
     hipMemcpyKind kind;
 
-    if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
-        (source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
+    if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::device)
     {
         kind = hipMemcpyDeviceToDevice;
     }
-    else if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
-             source_type == dynamic_memory_type::host)
+    else if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::host)
     {
         kind = hipMemcpyHostToDevice;
     }
-    else if (destination_type == dynamic_memory_type::host &&
-             (source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
+    else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::device)
     {
         kind = hipMemcpyDeviceToHost;
     }
@@ -123,22 +108,4 @@ memcpy(void* destination,
     STDGPU_HIP_SAFE_CALL(hipMemcpy(destination, source, static_cast<std::size_t>(bytes), kind));
 }
 
-void
-workaround_synchronize_managed_memory()
-{
-    // We need to synchronize the whole device before accessing managed memory on old GPUs
-    int current_device;
-    int has_concurrent_managed_access;
-    STDGPU_HIP_SAFE_CALL(hipGetDevice(&current_device));
-    STDGPU_HIP_SAFE_CALL(hipDeviceGetAttribute(&has_concurrent_managed_access,
-                                               hipDeviceAttributeConcurrentManagedAccess,
-                                               current_device));
-    if (has_concurrent_managed_access == 0)
-    {
-        printf("stdgpu::hip::workaround_synchronize_managed_memory : Synchronizing the whole GPU in order to access "
-               "the data on the host ...\n");
-        STDGPU_HIP_SAFE_CALL(hipDeviceSynchronize());
-    }
-}
-
 } // namespace stdgpu::hip
diff --git a/src/stdgpu/hip/memory.h b/src/stdgpu/hip/memory.h
@@ -54,12 +54,6 @@ memcpy(void* destination,
        dynamic_memory_type destination_type,
        dynamic_memory_type source_type);
 
-/**
- * \brief Workarounds a synchronization issue with older GPUs
- */
-void
-workaround_synchronize_managed_memory();
-
 } // namespace stdgpu::hip
 
 #endif // STDGPU_HIP_MEMORY_H
diff --git a/src/stdgpu/impl/memory.cpp b/src/stdgpu/impl/memory.cpp
@@ -239,12 +239,6 @@ dispatch_allocation_manager(const dynamic_memory_type type)
             return manager_host;
         }
 
-        case dynamic_memory_type::managed:
-        {
-            static memory_manager manager_managed;
-            return manager_managed;
-        }
-
         case dynamic_memory_type::invalid:
         default:
         {
@@ -255,12 +249,6 @@ dispatch_allocation_manager(const dynamic_memory_type type)
     }
 }
 
-void
-workaround_synchronize_managed_memory()
-{
-    stdgpu::STDGPU_BACKEND_NAMESPACE::workaround_synchronize_managed_memory();
-}
-
 [[nodiscard]] void*
 allocate(index64_t bytes, dynamic_memory_type type)
 {
@@ -310,15 +298,12 @@ memcpy(void* destination,
 {
     if (!external_memory)
     {
-        if (!dispatch_allocation_manager(destination_type).contains_submemory(destination, bytes) &&
-            !dispatch_allocation_manager(dynamic_memory_type::managed).contains_submemory(destination, bytes))
+        if (!dispatch_allocation_manager(destination_type).contains_submemory(destination, bytes))
         {
             printf("stdgpu::detail::memcpy : Copying to unknown destination pointer not possible\n");
             return;
         }
-        if (!dispatch_allocation_manager(source_type).contains_submemory(const_cast<void*>(source), bytes) &&
-            !dispatch_allocation_manager(dynamic_memory_type::managed)
-                     .contains_submemory(const_cast<void*>(source), bytes))
+        if (!dispatch_allocation_manager(source_type).contains_submemory(const_cast<void*>(source), bytes))
         {
             printf("stdgpu::detail::memcpy : Copying from unknown source pointer not possible\n");
             return;
@@ -345,12 +330,6 @@ dispatch_size_manager(const dynamic_memory_type type)
             return manager_host;
         }
 
-        case dynamic_memory_type::managed:
-        {
-            static memory_manager manager_managed;
-            return manager_managed;
-        }
-
         case dynamic_memory_type::invalid:
         default:
         {
@@ -375,10 +354,6 @@ get_dynamic_memory_type(void* array)
     {
         return dynamic_memory_type::host;
     }
-    if (detail::dispatch_size_manager(dynamic_memory_type::managed).contains_memory(array))
-    {
-        return dynamic_memory_type::managed;
-    }
 
     return dynamic_memory_type::invalid;
 }

diff --git a/src/stdgpu/impl/memory_detail.h b/src/stdgpu/impl/memory_detail.h
@@ -124,9 +124,6 @@ unoptimized_destroy(ExecutionPolicy&& policy, Iterator first, Iterator last)
                    destroy_functor<Iterator>(first));
 }
 
-void
-workaround_synchronize_managed_memory();
-
 } // namespace stdgpu::detail
 
 template <typename T>
@@ -179,61 +176,6 @@ createHostArray(const stdgpu::index64_t count, const T default_value)
     return host_array;
 }
 
-template <typename T>
-T*
-createManagedArray(const stdgpu::index64_t count, const T default_value, const Initialization initialize_on)
-{
-    using Allocator = stdgpu::safe_managed_allocator<T>;
-    Allocator managed_allocator;
-
-    T* managed_array = stdgpu::allocator_traits<Allocator>::allocate(managed_allocator, count);
-
-    if (managed_array == nullptr)
-    {
-        printf("createManagedArray : Failed to allocate array. Aborting ...\n");
-        return nullptr;
-    }
-
-    switch (initialize_on)
-    {
-#if STDGPU_DETAIL_IS_DEVICE_COMPILED
-        case Initialization::DEVICE:
-        {
-            stdgpu::uninitialized_fill(stdgpu::execution::device,
-                                       stdgpu::device_begin(managed_array),
-                                       stdgpu::device_end(managed_array),
-                                       default_value);
-        }
-        break;
-#else
-        case Initialization::DEVICE:
-        {
-            // Same as host path
-        }
-            [[fallthrough]];
-#endif
-
-        case Initialization::HOST:
-        {
-            stdgpu::detail::workaround_synchronize_managed_memory();
-
-            stdgpu::uninitialized_fill(stdgpu::execution::host,
-                                       stdgpu::host_begin(managed_array),
-                                       stdgpu::host_end(managed_array),
-                                       default_value);
-        }
-        break;
-
-        default:
-        {
-            printf("createManagedArray : Invalid initialization device. Returning created but uninitialized array "
-                   "...\n");
-        }
-    }
-
-    return managed_array;
-}
-
 template <typename T>
 void
 destroyDeviceArray(T*& device_array)
@@ -275,21 +217,6 @@ destroyHostArray(T*& host_array)
     host_array = nullptr;
 }
 
-template <typename T>
-void
-destroyManagedArray(T*& managed_array)
-{
-    using Allocator = stdgpu::safe_managed_allocator<T>;
-    Allocator managed_allocator;
-
-    // Call on host since the initialization place is not known
-    stdgpu::allocator_traits<Allocator>::deallocate_filled(stdgpu::execution::host,
-                                                           managed_allocator,
-                                                           managed_array,
-                                                           stdgpu::size(managed_array));
-    managed_array = nullptr;
-}
-
 template <typename T>
 T*
 copyCreateDevice2HostArray(const T* device_array, const stdgpu::index64_t count, const MemoryCopy check_safety)
@@ -565,33 +492,6 @@ safe_host_allocator<T>::deallocate(T* p, index64_t n)
                        memory_type);
 }
 
-template <typename T>
-template <typename U>
-safe_managed_allocator<T>::safe_managed_allocator([[maybe_unused]] const safe_managed_allocator<U>& other) noexcept
-{
-}
-
-template <typename T>
-[[nodiscard]] T*
-safe_managed_allocator<T>::allocate(index64_t n)
-{
-    T* p = static_cast<T*>(
-            detail::allocate(n * static_cast<index64_t>(sizeof(T)), memory_type)); // NOLINT(bugprone-sizeof-expression)
-    register_memory(p, n, memory_type);
-    return p;
-}
-
-template <typename T>
-void
-safe_managed_allocator<T>::deallocate(T* p, index64_t n)
-{
-    deregister_memory(p, n, memory_type);
-    // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion)
-    detail::deallocate(static_cast<void*>(const_cast<std::remove_cv_t<T>*>(p)),
-                       n * static_cast<index64_t>(sizeof(T)), // NOLINT(bugprone-sizeof-expression)
-                       memory_type);
-}
-
 template <typename Allocator>
 typename allocator_traits<Allocator>::pointer
 allocator_traits<Allocator>::allocate(Allocator& a, typename allocator_traits<Allocator>::index_type n)