From 535dae56176f42848f459a8625e01941a2548535 Mon Sep 17 00:00:00 2001
From: Patrick Stotko <stotko@cs.uni-bonn.de>
Date: Tue, 19 Nov 2024 08:22:51 +0100
Subject: [PATCH] memory: Drop support for managed memory

---
 benchmarks/stdgpu/main.cpp        |   7 -
 src/stdgpu/cuda/impl/memory.cpp   |  39 +-----
 src/stdgpu/cuda/memory.h          |   6 -
 src/stdgpu/hip/impl/memory.cpp    |  39 +-----
 src/stdgpu/hip/memory.h           |   6 -
 src/stdgpu/impl/memory.cpp        |  29 +---
 src/stdgpu/impl/memory_detail.h   | 100 --------------
 src/stdgpu/memory.h               | 130 +-----------------
 src/stdgpu/openmp/impl/memory.cpp |   8 --
 src/stdgpu/openmp/memory.h        |   6 -
 tests/stdgpu/main.cpp             |   7 -
 tests/stdgpu/memory.inc           | 219 ------------------------------
 12 files changed, 13 insertions(+), 583 deletions(-)

diff --git a/benchmarks/stdgpu/main.cpp b/benchmarks/stdgpu/main.cpp
index cb414a5c4..7d913072f 100644
--- a/benchmarks/stdgpu/main.cpp
+++ b/benchmarks/stdgpu/main.cpp
@@ -65,13 +65,6 @@ main(int argc, char* argv[])
            stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host),
            stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::host) -
                    stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host));
-    printf("|   Managed    %6" STDGPU_PRIINDEX64 " / %6" STDGPU_PRIINDEX64 " (%6" STDGPU_PRIINDEX64
-           ")                   |\n",
-           stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::managed),
-           stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::managed),
-           stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::managed) -
-                   stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::managed));
-    printf("+---------------------------------------------------------+\n");
 
     return EXIT_SUCCESS;
 }
diff --git a/src/stdgpu/cuda/impl/memory.cpp b/src/stdgpu/cuda/impl/memory.cpp
index 1f82270a2..019e58535 100644
--- a/src/stdgpu/cuda/impl/memory.cpp
+++ b/src/stdgpu/cuda/impl/memory.cpp
@@ -39,12 +39,6 @@ malloc(const dynamic_memory_type type, void** array, index64_t bytes)
         }
         break;
 
-        case dynamic_memory_type::managed:
-        {
-            STDGPU_CUDA_SAFE_CALL(cudaMallocManaged(array, static_cast<std::size_t>(bytes)));
-        }
-        break;
-
         case dynamic_memory_type::invalid:
         default:
         {
@@ -71,12 +65,6 @@ free(const dynamic_memory_type type, void* array)
         }
         break;
 
-        case dynamic_memory_type::managed:
-        {
-            STDGPU_CUDA_SAFE_CALL(cudaFree(array));
-        }
-        break;
-
         case dynamic_memory_type::invalid:
         default:
         {
@@ -95,18 +83,15 @@ memcpy(void* destination,
 {
     cudaMemcpyKind kind;
 
-    if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
-        (source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
+    if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::device)
     {
         kind = cudaMemcpyDeviceToDevice;
     }
-    else if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
-             source_type == dynamic_memory_type::host)
+    else if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::host)
     {
         kind = cudaMemcpyHostToDevice;
     }
-    else if (destination_type == dynamic_memory_type::host &&
-             (source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
+    else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::device)
     {
         kind = cudaMemcpyDeviceToHost;
     }
@@ -123,22 +108,4 @@ memcpy(void* destination,
     STDGPU_CUDA_SAFE_CALL(cudaMemcpy(destination, source, static_cast<std::size_t>(bytes), kind));
 }
 
-void
-workaround_synchronize_managed_memory()
-{
-    // We need to synchronize the whole device before accessing managed memory on pre-Pascal GPUs
-    int current_device;
-    int hash_concurrent_managed_access;
-    STDGPU_CUDA_SAFE_CALL(cudaGetDevice(&current_device));
-    STDGPU_CUDA_SAFE_CALL(cudaDeviceGetAttribute(&hash_concurrent_managed_access,
-                                                 cudaDevAttrConcurrentManagedAccess,
-                                                 current_device));
-    if (hash_concurrent_managed_access == 0)
-    {
-        printf("stdgpu::cuda::workaround_synchronize_managed_memory : Synchronizing the whole GPU in order to access "
-               "the data on the host ...\n");
-        STDGPU_CUDA_SAFE_CALL(cudaDeviceSynchronize());
-    }
-}
-
 } // namespace stdgpu::cuda
diff --git a/src/stdgpu/cuda/memory.h b/src/stdgpu/cuda/memory.h
index de49a4b4d..72c97ffb9 100644
--- a/src/stdgpu/cuda/memory.h
+++ b/src/stdgpu/cuda/memory.h
@@ -54,12 +54,6 @@ memcpy(void* destination,
        dynamic_memory_type destination_type,
        dynamic_memory_type source_type);
 
-/**
- * \brief Workarounds a synchronization issue with older GPUs
- */
-void
-workaround_synchronize_managed_memory();
-
 } // namespace stdgpu::cuda
 
 #endif // STDGPU_CUDA_MEMORY_H
diff --git a/src/stdgpu/hip/impl/memory.cpp b/src/stdgpu/hip/impl/memory.cpp
index cf17fc9cf..55a9aa557 100644
--- a/src/stdgpu/hip/impl/memory.cpp
+++ b/src/stdgpu/hip/impl/memory.cpp
@@ -39,12 +39,6 @@ malloc(const dynamic_memory_type type, void** array, index64_t bytes)
         }
         break;
 
-        case dynamic_memory_type::managed:
-        {
-            STDGPU_HIP_SAFE_CALL(hipMallocManaged(array, static_cast<std::size_t>(bytes)));
-        }
-        break;
-
         case dynamic_memory_type::invalid:
         default:
         {
@@ -71,12 +65,6 @@ free(const dynamic_memory_type type, void* array)
         }
         break;
 
-        case dynamic_memory_type::managed:
-        {
-            STDGPU_HIP_SAFE_CALL(hipFree(array));
-        }
-        break;
-
         case dynamic_memory_type::invalid:
         default:
         {
@@ -95,18 +83,15 @@ memcpy(void* destination,
 {
     hipMemcpyKind kind;
 
-    if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
-        (source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
+    if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::device)
     {
         kind = hipMemcpyDeviceToDevice;
     }
-    else if ((destination_type == dynamic_memory_type::device || destination_type == dynamic_memory_type::managed) &&
-             source_type == dynamic_memory_type::host)
+    else if (destination_type == dynamic_memory_type::device && source_type == dynamic_memory_type::host)
     {
         kind = hipMemcpyHostToDevice;
     }
-    else if (destination_type == dynamic_memory_type::host &&
-             (source_type == dynamic_memory_type::device || source_type == dynamic_memory_type::managed))
+    else if (destination_type == dynamic_memory_type::host && source_type == dynamic_memory_type::device)
     {
         kind = hipMemcpyDeviceToHost;
     }
@@ -123,22 +108,4 @@ memcpy(void* destination,
     STDGPU_HIP_SAFE_CALL(hipMemcpy(destination, source, static_cast<std::size_t>(bytes), kind));
 }
 
-void
-workaround_synchronize_managed_memory()
-{
-    // We need to synchronize the whole device before accessing managed memory on old GPUs
-    int current_device;
-    int has_concurrent_managed_access;
-    STDGPU_HIP_SAFE_CALL(hipGetDevice(&current_device));
-    STDGPU_HIP_SAFE_CALL(hipDeviceGetAttribute(&has_concurrent_managed_access,
-                                               hipDeviceAttributeConcurrentManagedAccess,
-                                               current_device));
-    if (has_concurrent_managed_access == 0)
-    {
-        printf("stdgpu::hip::workaround_synchronize_managed_memory : Synchronizing the whole GPU in order to access "
-               "the data on the host ...\n");
-        STDGPU_HIP_SAFE_CALL(hipDeviceSynchronize());
-    }
-}
-
 } // namespace stdgpu::hip
diff --git a/src/stdgpu/hip/memory.h b/src/stdgpu/hip/memory.h
index d74fa31b8..c4b45394b 100644
--- a/src/stdgpu/hip/memory.h
+++ b/src/stdgpu/hip/memory.h
@@ -54,12 +54,6 @@ memcpy(void* destination,
        dynamic_memory_type destination_type,
        dynamic_memory_type source_type);
 
-/**
- * \brief Workarounds a synchronization issue with older GPUs
- */
-void
-workaround_synchronize_managed_memory();
-
 } // namespace stdgpu::hip
 
 #endif // STDGPU_HIP_MEMORY_H
diff --git a/src/stdgpu/impl/memory.cpp b/src/stdgpu/impl/memory.cpp
index 045055f8d..10864ba3b 100644
--- a/src/stdgpu/impl/memory.cpp
+++ b/src/stdgpu/impl/memory.cpp
@@ -239,12 +239,6 @@ dispatch_allocation_manager(const dynamic_memory_type type)
             return manager_host;
         }
 
-        case dynamic_memory_type::managed:
-        {
-            static memory_manager manager_managed;
-            return manager_managed;
-        }
-
         case dynamic_memory_type::invalid:
         default:
         {
@@ -255,12 +249,6 @@ dispatch_allocation_manager(const dynamic_memory_type type)
     }
 }
 
-void
-workaround_synchronize_managed_memory()
-{
-    stdgpu::STDGPU_BACKEND_NAMESPACE::workaround_synchronize_managed_memory();
-}
-
 [[nodiscard]] void*
 allocate(index64_t bytes, dynamic_memory_type type)
 {
@@ -310,15 +298,12 @@ memcpy(void* destination,
 {
     if (!external_memory)
     {
-        if (!dispatch_allocation_manager(destination_type).contains_submemory(destination, bytes) &&
-            !dispatch_allocation_manager(dynamic_memory_type::managed).contains_submemory(destination, bytes))
+        if (!dispatch_allocation_manager(destination_type).contains_submemory(destination, bytes))
         {
             printf("stdgpu::detail::memcpy : Copying to unknown destination pointer not possible\n");
             return;
         }
-        if (!dispatch_allocation_manager(source_type).contains_submemory(const_cast<void*>(source), bytes) &&
-            !dispatch_allocation_manager(dynamic_memory_type::managed)
-                     .contains_submemory(const_cast<void*>(source), bytes))
+        if (!dispatch_allocation_manager(source_type).contains_submemory(const_cast<void*>(source), bytes))
         {
             printf("stdgpu::detail::memcpy : Copying from unknown source pointer not possible\n");
             return;
@@ -345,12 +330,6 @@ dispatch_size_manager(const dynamic_memory_type type)
             return manager_host;
         }
 
-        case dynamic_memory_type::managed:
-        {
-            static memory_manager manager_managed;
-            return manager_managed;
-        }
-
         case dynamic_memory_type::invalid:
         default:
         {
@@ -375,10 +354,6 @@ get_dynamic_memory_type(void* array)
     {
         return dynamic_memory_type::host;
     }
-    if (detail::dispatch_size_manager(dynamic_memory_type::managed).contains_memory(array))
-    {
-        return dynamic_memory_type::managed;
-    }
 
     return dynamic_memory_type::invalid;
 }
diff --git a/src/stdgpu/impl/memory_detail.h b/src/stdgpu/impl/memory_detail.h
index d2f7b6723..f98f4b7db 100644
--- a/src/stdgpu/impl/memory_detail.h
+++ b/src/stdgpu/impl/memory_detail.h
@@ -124,9 +124,6 @@ unoptimized_destroy(ExecutionPolicy&& policy, Iterator first, Iterator last)
                    destroy_functor<Iterator>(first));
 }
 
-void
-workaround_synchronize_managed_memory();
-
 } // namespace stdgpu::detail
 
 template <typename T>
@@ -179,61 +176,6 @@ createHostArray(const stdgpu::index64_t count, const T default_value)
     return host_array;
 }
 
-template <typename T>
-T*
-createManagedArray(const stdgpu::index64_t count, const T default_value, const Initialization initialize_on)
-{
-    using Allocator = stdgpu::safe_managed_allocator<T>;
-    Allocator managed_allocator;
-
-    T* managed_array = stdgpu::allocator_traits<Allocator>::allocate(managed_allocator, count);
-
-    if (managed_array == nullptr)
-    {
-        printf("createManagedArray : Failed to allocate array. Aborting ...\n");
-        return nullptr;
-    }
-
-    switch (initialize_on)
-    {
-#if STDGPU_DETAIL_IS_DEVICE_COMPILED
-        case Initialization::DEVICE:
-        {
-            stdgpu::uninitialized_fill(stdgpu::execution::device,
-                                       stdgpu::device_begin(managed_array),
-                                       stdgpu::device_end(managed_array),
-                                       default_value);
-        }
-        break;
-#else
-        case Initialization::DEVICE:
-        {
-            // Same as host path
-        }
-            [[fallthrough]];
-#endif
-
-        case Initialization::HOST:
-        {
-            stdgpu::detail::workaround_synchronize_managed_memory();
-
-            stdgpu::uninitialized_fill(stdgpu::execution::host,
-                                       stdgpu::host_begin(managed_array),
-                                       stdgpu::host_end(managed_array),
-                                       default_value);
-        }
-        break;
-
-        default:
-        {
-            printf("createManagedArray : Invalid initialization device. Returning created but uninitialized array "
-                   "...\n");
-        }
-    }
-
-    return managed_array;
-}
-
 template <typename T>
 void
 destroyDeviceArray(T*& device_array)
@@ -275,21 +217,6 @@ destroyHostArray(T*& host_array)
     host_array = nullptr;
 }
 
-template <typename T>
-void
-destroyManagedArray(T*& managed_array)
-{
-    using Allocator = stdgpu::safe_managed_allocator<T>;
-    Allocator managed_allocator;
-
-    // Call on host since the initialization place is not known
-    stdgpu::allocator_traits<Allocator>::deallocate_filled(stdgpu::execution::host,
-                                                           managed_allocator,
-                                                           managed_array,
-                                                           stdgpu::size(managed_array));
-    managed_array = nullptr;
-}
-
 template <typename T>
 T*
 copyCreateDevice2HostArray(const T* device_array, const stdgpu::index64_t count, const MemoryCopy check_safety)
@@ -565,33 +492,6 @@ safe_host_allocator<T>::deallocate(T* p, index64_t n)
                        memory_type);
 }
 
-template <typename T>
-template <typename U>
-safe_managed_allocator<T>::safe_managed_allocator([[maybe_unused]] const safe_managed_allocator<U>& other) noexcept
-{
-}
-
-template <typename T>
-[[nodiscard]] T*
-safe_managed_allocator<T>::allocate(index64_t n)
-{
-    T* p = static_cast<T*>(
-            detail::allocate(n * static_cast<index64_t>(sizeof(T)), memory_type)); // NOLINT(bugprone-sizeof-expression)
-    register_memory(p, n, memory_type);
-    return p;
-}
-
-template <typename T>
-void
-safe_managed_allocator<T>::deallocate(T* p, index64_t n)
-{
-    deregister_memory(p, n, memory_type);
-    // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion)
-    detail::deallocate(static_cast<void*>(const_cast<std::remove_cv_t<T>*>(p)),
-                       n * static_cast<index64_t>(sizeof(T)), // NOLINT(bugprone-sizeof-expression)
-                       memory_type);
-}
-
 template <typename Allocator>
 typename allocator_traits<Allocator>::pointer
 allocator_traits<Allocator>::allocate(Allocator& a, typename allocator_traits<Allocator>::index_type n)
diff --git a/src/stdgpu/memory.h b/src/stdgpu/memory.h
index 8f8260eba..6f60c0268 100644
--- a/src/stdgpu/memory.h
+++ b/src/stdgpu/memory.h
@@ -36,16 +36,6 @@
 #include <stdgpu/platform.h>
 #include <stdgpu/type_traits.h>
 
-/**
- * \ingroup memory
- * \brief The place to initialize the created array
- */
-enum class Initialization : std::int8_t
-{
-    HOST,  /**< The array is initialized on the host (CPU) */
-    DEVICE /**< The array is initialized on the device (GPU) */
-};
-
 /**
  * \ingroup memory
  * \brief Creates a new device array and initializes (fills) it with the given default value
@@ -72,22 +62,6 @@ template <typename T>
 [[nodiscard]] T*
 createHostArray(const stdgpu::index64_t count, const T default_value = T());
 
-/**
- * \ingroup memory
- * \brief Creates a new managed array and initializes (fills) it with the given default value
- * \tparam T The type of the array
- * \param[in] count The number of elements of the new array
- * \param[in] default_value A default value, that should be stored in every array entry
- * \param[in] initialize_on The device on which the fill operation is performed
- * \return The allocated managed array if count > 0, nullptr otherwise
- * \post get_dynamic_memory_type(result) == dynamic_memory_type::managed if count > 0
- */
-template <typename T>
-[[nodiscard]] T*
-createManagedArray(const stdgpu::index64_t count,
-                   const T default_value = T(),
-                   const Initialization initialize_on = Initialization::DEVICE);
-
 /**
  * \ingroup memory
  * \brief Destroys the given device array
@@ -108,16 +82,6 @@ template <typename T>
 void
 destroyHostArray(T*& host_array);
 
-/**
- * \ingroup memory
- * \brief Destroys the given managed array
- * \tparam T The type of the array
- * \param[in] managed_array A managed array
- */
-template <typename T>
-void
-destroyManagedArray(T*& managed_array);
-
 /**
  * \ingroup memory
  * \brief The copy check states
@@ -137,7 +101,6 @@ enum class MemoryCopy : std::int8_t
  * \param[in] count The number of elements of device_array
  * \param[in] check_safety True if this function should check whether copying is safe, false otherwise
  * \return The same array allocated on the host
- * \note The source array might also be a managed array
  */
 template <typename T>
 [[nodiscard]] T*
@@ -153,7 +116,6 @@ copyCreateDevice2HostArray(const T* device_array,
  * \param[in] count The number of elements of host_array
  * \param[in] check_safety True if this function should check whether copying is safe, false otherwise
  * \return The same array allocated on the device
- * \note The source array might also be a managed array
  */
 template <typename T>
 [[nodiscard]] T*
@@ -169,7 +131,6 @@ copyCreateHost2DeviceArray(const T* host_array,
  * \param[in] count The number of elements of host_array
  * \param[in] check_safety True if this function should check whether copying is safe, false otherwise
  * \return The same array allocated on the host
- * \note The source array might also be a managed array
  */
 template <typename T>
 [[nodiscard]] T*
@@ -185,7 +146,6 @@ copyCreateHost2HostArray(const T* host_array,
  * \param[in] count The number of elements of device_array
  * \param[in] check_safety True if this function should check whether copying is safe, false otherwise
  * \return The same array allocated on the device
- * \note The source array might also be a managed array
  */
 template <typename T>
 [[nodiscard]] T*
@@ -201,7 +161,6 @@ copyCreateDevice2DeviceArray(const T* device_array,
  * \param[in] count The number of elements of source_device_array
  * \param[out] destination_host_array The host array
  * \param[in] check_safety True if this function should check whether copying is safe, false otherwise
- * \note The source and destination arrays might also be managed arrays
  */
 template <typename T>
 void
@@ -218,7 +177,6 @@ copyDevice2HostArray(const T* source_device_array,
  * \param[in] count The number of elements of source_host_array
  * \param[out] destination_device_array The device array
  * \param[in] check_safety True if this function should check whether copying is safe, false otherwise
- * \note The source and destination arrays might also be managed arrays
  */
 template <typename T>
 void
@@ -235,7 +193,6 @@ copyHost2DeviceArray(const T* source_host_array,
  * \param[in] count The number of elements of source_host_array
  * \param[out] destination_host_array The host array
  * \param[in] check_safety True if this function should check whether copying is safe, false otherwise
- * \note The source and destination arrays might also be managed arrays
  */
 template <typename T>
 void
@@ -252,7 +209,6 @@ copyHost2HostArray(const T* source_host_array,
  * \param[in] count The number of elements of source_device_array
  * \param[out] destination_device_array The device array
  * \param[in] check_safety True if this function should check whether copying is safe, false otherwise
- * \note The source and destination arrays might also be managed arrays
  */
 template <typename T>
 void
@@ -360,11 +316,9 @@ class device_unique_object
  */
 enum class dynamic_memory_type : std::int8_t
 {
-    host,    /**< The array is allocated on the host (CPU) */
-    device,  /**< The array is allocated on the device (GPU) */
-    managed, /**< The array is allocated on both the host (CPU) and device (GPU) and managed internally by the driver
-                via paging */
-    invalid  /**< The array is not registered by our API */
+    host,   /**< The array is allocated on the host (CPU) */
+    device, /**< The array is allocated on the device (GPU) */
+    invalid /**< The array is not registered by our API */
 };
 
 /**
@@ -525,80 +479,6 @@ struct safe_host_allocator
     deallocate(T* p, index64_t n);
 };
 
-/**
- * \ingroup memory
- * \brief An allocator for managed memory
- * \tparam T A type
- */
-template <typename T>
-struct safe_managed_allocator
-{
-    using value_type = T; /**< T */
-
-    /**
-     * \brief Dynamic memory type of allocations
-     */
-    constexpr static dynamic_memory_type memory_type = dynamic_memory_type::managed;
-
-    /**
-     * \brief Default constructor
-     */
-    safe_managed_allocator() noexcept = default;
-
-    /**
-     * \brief Default destructor
-     */
-    ~safe_managed_allocator() noexcept = default;
-
-    /**
-     * \brief Copy constructor
-     */
-    safe_managed_allocator(const safe_managed_allocator&) noexcept = default;
-
-    /**
-     * \brief Copy constructor
-     * \tparam U Another type
-     * \param[in] other The allocator to be copied from
-     */
-    template <typename U>
-    explicit safe_managed_allocator(const safe_managed_allocator<U>& other) noexcept;
-
-    /**
-     * \brief Copy assignment operator
-     * \return *this
-     */
-    safe_managed_allocator&
-    operator=(const safe_managed_allocator&) noexcept = default;
-
-    /**
-     * \brief Move constructor
-     */
-    safe_managed_allocator(safe_managed_allocator&&) noexcept = default;
-
-    /**
-     * \brief Move assignment operator
-     * \return *this
-     */
-    safe_managed_allocator&
-    operator=(safe_managed_allocator&&) noexcept = default;
-
-    /**
-     * \brief Allocates a memory block of the given size
-     * \param[in] n The number of allocated elements
-     * \return A pointer to the allocated memory block
-     */
-    [[nodiscard]] T*
-    allocate(index64_t n);
-
-    /**
-     * \brief Deallocates the given memory block
-     * \param[in] p A pointer to the memory block
-     * \param[in] n The number of allocated elements (must match the size during allocation)
-     */
-    void
-    deallocate(T* p, index64_t n);
-};
-
 /**
  * \ingroup memory
  * \brief A general allocator traitor
@@ -946,7 +826,7 @@ using namespace adl_barrier;
  * \param[in] p A pointer to the memory block
  * \param[in] n The size of the memory block in bytes
  * \param[in] memory_type The dynamic memory type of the memory block
- * \note Automatically called by safe_device_allocator, safe_host_allocator, safe_managed_allocator
+ * \note Automatically called by safe_device_allocator, safe_host_allocator
  */
 template <typename T>
 void
@@ -958,7 +838,7 @@ register_memory(T* p, index64_t n, dynamic_memory_type memory_type);
  * \param[in] p A pointer to the memory block
  * \param[in] n The size of the memory block in bytes (must match the size during registration)
  * \param[in] memory_type The dynamic memory type of the memory block
- * \note Automatically called by safe_device_allocator, safe_host_allocator, safe_managed_allocator
+ * \note Automatically called by safe_device_allocator, safe_host_allocator
  * \note Only thread-safe if called before the memory block is actually freed
  */
 template <typename T>
diff --git a/src/stdgpu/openmp/impl/memory.cpp b/src/stdgpu/openmp/impl/memory.cpp
index 6789b4257..25471e8bb 100644
--- a/src/stdgpu/openmp/impl/memory.cpp
+++ b/src/stdgpu/openmp/impl/memory.cpp
@@ -29,7 +29,6 @@ malloc(const dynamic_memory_type type, void** array, index64_t bytes)
     {
         case dynamic_memory_type::device:
         case dynamic_memory_type::host:
-        case dynamic_memory_type::managed:
         {
             *array =
                     std::malloc(static_cast<std::size_t>(bytes)); // NOLINT(hicpp-no-malloc,cppcoreguidelines-no-malloc)
@@ -52,7 +51,6 @@ free(const dynamic_memory_type type, void* array)
     {
         case dynamic_memory_type::device:
         case dynamic_memory_type::host:
-        case dynamic_memory_type::managed:
         {
             std::free(array); // NOLINT(hicpp-no-malloc,cppcoreguidelines-no-malloc)
         }
@@ -83,10 +81,4 @@ memcpy(void* destination,
     std::memcpy(destination, source, static_cast<std::size_t>(bytes));
 }
 
-void
-workaround_synchronize_managed_memory()
-{
-    // No synchronization workaround required for OpenMP backend
-}
-
 } // namespace stdgpu::openmp
diff --git a/src/stdgpu/openmp/memory.h b/src/stdgpu/openmp/memory.h
index cd1b0aa47..0019cac25 100644
--- a/src/stdgpu/openmp/memory.h
+++ b/src/stdgpu/openmp/memory.h
@@ -54,12 +54,6 @@ memcpy(void* destination,
        dynamic_memory_type destination_type,
        dynamic_memory_type source_type);
 
-/**
- * \brief Workarounds a synchronization issue with older GPUs
- */
-void
-workaround_synchronize_managed_memory();
-
 } // namespace stdgpu::openmp
 
 #endif // STDGPU_OPENMP_MEMORY_H
diff --git a/tests/stdgpu/main.cpp b/tests/stdgpu/main.cpp
index bc659744c..04e363ec0 100644
--- a/tests/stdgpu/main.cpp
+++ b/tests/stdgpu/main.cpp
@@ -66,13 +66,6 @@ main(int argc, char* argv[])
            stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host),
            stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::host) -
                    stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::host));
-    printf("|   Managed    %6" STDGPU_PRIINDEX64 " / %6" STDGPU_PRIINDEX64 " (%6" STDGPU_PRIINDEX64
-           ")                   |\n",
-           stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::managed),
-           stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::managed),
-           stdgpu::get_allocation_count(stdgpu::dynamic_memory_type::managed) -
-                   stdgpu::get_deallocation_count(stdgpu::dynamic_memory_type::managed));
-    printf("+---------------------------------------------------------+\n");
 
     return result;
 }
diff --git a/tests/stdgpu/memory.inc b/tests/stdgpu/memory.inc
index 5bc0d596c..c7fffca76 100644
--- a/tests/stdgpu/memory.inc
+++ b/tests/stdgpu/memory.inc
@@ -57,18 +57,12 @@ createDeviceArray<int>(const stdgpu::index64_t, const int);
 template int*
 createHostArray<int>(const stdgpu::index64_t, const int);
 
-template int*
-createManagedArray<int>(const stdgpu::index64_t, const int, const Initialization);
-
 template void
 destroyDeviceArray<int>(int*&);
 
 template void
 destroyHostArray<int>(int*&);
 
-template void
-destroyManagedArray<int>(int*&);
-
 template int*
 copyCreateDevice2HostArray<int>(const int*, const stdgpu::index64_t, const MemoryCopy);
 
@@ -100,8 +94,6 @@ template struct safe_device_allocator<int>;
 
 template struct safe_host_allocator<int>;
 
-template struct safe_managed_allocator<int>;
-
 template struct allocator_traits<safe_device_allocator<int>>;
 
 template STDGPU_HOST_DEVICE void
@@ -214,28 +206,6 @@ TEST_F(STDGPU_MEMORY_TEST_CLASS, dynamic_memory_type_host)
     destroyHostArray<int>(array_host);
 }
 
-TEST_F(STDGPU_MEMORY_TEST_CLASS, dynamic_memory_type_managed_on_device)
-{
-    const stdgpu::index64_t size = 42;
-    const int default_value = 0;
-    int* array_managed = createManagedArray<int>(size, default_value, Initialization::DEVICE);
-
-    EXPECT_EQ(stdgpu::get_dynamic_memory_type(array_managed), stdgpu::dynamic_memory_type::managed);
-
-    destroyManagedArray<int>(array_managed);
-}
-
-TEST_F(STDGPU_MEMORY_TEST_CLASS, dynamic_memory_type_managed_on_host)
-{
-    const stdgpu::index64_t size = 42;
-    const int default_value = 0;
-    int* array_managed = createManagedArray<int>(size, default_value, Initialization::HOST);
-
-    EXPECT_EQ(stdgpu::get_dynamic_memory_type(array_managed), stdgpu::dynamic_memory_type::managed);
-
-    destroyManagedArray<int>(array_managed);
-}
-
 TEST_F(STDGPU_MEMORY_TEST_CLASS, dynamic_memory_type_invalid_pointer)
 {
     // NOLINTNEXTLINE(readability-magic-numbers,cppcoreguidelines-avoid-magic-numbers)
@@ -276,28 +246,6 @@ TEST_F(STDGPU_MEMORY_TEST_CLASS, size_bytes_host)
     destroyHostArray<int>(array_host);
 }
 
-TEST_F(STDGPU_MEMORY_TEST_CLASS, size_bytes_managed_device)
-{
-    const stdgpu::index64_t size = 42;
-    const int default_value = 0;
-    int* array_managed = createManagedArray<int>(size, default_value, Initialization::DEVICE);
-
-    EXPECT_EQ(stdgpu::size_bytes(array_managed), size * static_cast<stdgpu::index64_t>(sizeof(int)));
-
-    destroyManagedArray<int>(array_managed);
-}
-
-TEST_F(STDGPU_MEMORY_TEST_CLASS, size_bytes_manged_host)
-{
-    const stdgpu::index64_t size = 42;
-    const int default_value = 0;
-    int* array_managed = createManagedArray<int>(size, default_value, Initialization::HOST);
-
-    EXPECT_EQ(stdgpu::size_bytes(array_managed), size * static_cast<stdgpu::index64_t>(sizeof(int)));
-
-    destroyManagedArray<int>(array_managed);
-}
-
 TEST_F(STDGPU_MEMORY_TEST_CLASS, size_bytes_nullptr)
 {
     EXPECT_EQ(stdgpu::size_bytes<int*>(nullptr), static_cast<stdgpu::index64_t>(0));
@@ -325,30 +273,6 @@ TEST_F(STDGPU_MEMORY_TEST_CLASS, size_bytes_host_shifted)
     destroyHostArray<int>(array_host);
 }
 
-TEST_F(STDGPU_MEMORY_TEST_CLASS, size_bytes_managed_device_shifted)
-{
-    const stdgpu::index64_t size = 42;
-    const int default_value = 0;
-    int* array_managed = createManagedArray<int>(size, default_value, Initialization::DEVICE);
-
-    const stdgpu::index64_t offset = 24;
-    EXPECT_EQ(stdgpu::size_bytes(array_managed + offset), static_cast<stdgpu::index64_t>(0));
-
-    destroyManagedArray<int>(array_managed);
-}
-
-TEST_F(STDGPU_MEMORY_TEST_CLASS, size_bytes_managed_host_shifted)
-{
-    const stdgpu::index64_t size = 42;
-    const int default_value = 0;
-    int* array_managed = createManagedArray<int>(size, default_value, Initialization::HOST);
-
-    const stdgpu::index64_t offset = 24;
-    EXPECT_EQ(stdgpu::size_bytes(array_managed + offset), static_cast<stdgpu::index64_t>(0));
-
-    destroyManagedArray<int>(array_managed);
-}
-
 TEST_F(STDGPU_MEMORY_TEST_CLASS, createDestroyDeviceArray_empty)
 {
     int* array_device = createDeviceArray<int>(0, 0);
@@ -371,21 +295,6 @@ TEST_F(STDGPU_MEMORY_TEST_CLASS, createDestroyHostArray_empty)
     EXPECT_EQ(array_host, nullptr);
 }
 
-TEST_F(STDGPU_MEMORY_TEST_CLASS, createDestroyManagedArray_empty)
-{
-    int* array_managed_device = createManagedArray<int>(0, 0, Initialization::DEVICE);
-    int* array_managed_host = createManagedArray<int>(0, 0, Initialization::HOST);
-
-    EXPECT_EQ(array_managed_device, nullptr);
-    EXPECT_EQ(array_managed_host, nullptr);
-
-    destroyManagedArray<int>(array_managed_device);
-    destroyManagedArray<int>(array_managed_host);
-
-    EXPECT_EQ(array_managed_device, nullptr);
-    EXPECT_EQ(array_managed_host, nullptr);
-}
-
 namespace
 {
 void
@@ -431,36 +340,6 @@ createAndDestroyHostFunction(const stdgpu::index_t iterations)
         EXPECT_EQ(array_host, nullptr);
     }
 }
-
-void
-createAndDestroyManagedFunction(const stdgpu::index_t iterations)
-{
-    for (stdgpu::index_t i = 0; i < iterations; ++i)
-    {
-        const stdgpu::index64_t size = 42;
-        const int default_value = 10;
-
-        int* array_managed_device = createManagedArray<int>(size, default_value, Initialization::DEVICE);
-        int* array_managed_host = createManagedArray<int>(size, default_value, Initialization::HOST);
-
-#if STDGPU_DETAIL_IS_DEVICE_COMPILED
-        EXPECT_TRUE(equal_value(stdgpu::execution::device,
-                                stdgpu::device_cbegin(array_managed_device),
-                                stdgpu::device_cend(array_managed_device),
-                                default_value));
-#endif
-        EXPECT_TRUE(equal_value(stdgpu::execution::host,
-                                stdgpu::host_cbegin(array_managed_host),
-                                stdgpu::host_cend(array_managed_host),
-                                default_value));
-
-        destroyManagedArray<int>(array_managed_device);
-        destroyManagedArray<int>(array_managed_host);
-
-        EXPECT_EQ(array_managed_device, nullptr);
-        EXPECT_EQ(array_managed_host, nullptr);
-    }
-}
 } // namespace
 
 TEST_F(STDGPU_MEMORY_TEST_CLASS, createDestroyDeviceArray)
@@ -513,34 +392,6 @@ TEST_F(STDGPU_MEMORY_TEST_CLASS, createDestroyHostArray_parallel)
     test_utils::for_each_concurrent_thread(&createAndDestroyHostFunction, iterations_per_thread);
 }
 
-TEST_F(STDGPU_MEMORY_TEST_CLASS, createDestroyManagedArray)
-{
-    createAndDestroyManagedFunction(1);
-}
-
-TEST_F(STDGPU_MEMORY_TEST_CLASS, createDestroyManagedArray_const_type)
-{
-    using T = stdgpu::pair<const int, float>;
-    const T default_value = { 10, 2.0F };
-    const stdgpu::index64_t size = 42;
-
-    T* array_managed = createManagedArray<T>(size, default_value);
-
-    destroyManagedArray<T>(array_managed);
-
-    EXPECT_EQ(array_managed, nullptr);
-}
-
-/*
-TEST_F(STDGPU_MEMORY_TEST_CLASS, createDestroyManagedArray_parallel)
-{
-    const stdgpu::index_t iterations_per_thread = static_cast<stdgpu::index_t>(pow(2, 7));
-
-    test_utils::for_each_concurrent_thread(&createAndDestroyManagedFunction,
-                                           iterations_per_thread);
-}
-*/
-
 TEST_F(STDGPU_MEMORY_TEST_CLASS, copyCreateHost2HostArray_empty)
 {
     int* array_host = createHostArray<int>(0, 0);
@@ -1144,22 +995,6 @@ TEST_F(STDGPU_MEMORY_TEST_CLASS, destroyHostArray_double_free)
     destroyHostArray<int>(array_host_2);
 }
 
-TEST_F(STDGPU_MEMORY_TEST_CLASS, destroyManangedArray_double_free)
-{
-    const stdgpu::index64_t size = 42;
-    const int default_value = 10;
-
-    int* array_managed_device = createManagedArray<int>(size, default_value, Initialization::DEVICE);
-    int* array_managed_host = createManagedArray<int>(size, default_value, Initialization::HOST);
-    int* array_managed_device_2 = array_managed_device;
-    int* array_managed_host_2 = array_managed_host;
-
-    destroyManagedArray<int>(array_managed_device);
-    destroyManagedArray<int>(array_managed_device_2);
-    destroyManagedArray<int>(array_managed_host);
-    destroyManagedArray<int>(array_managed_host_2);
-}
-
 TEST_F(STDGPU_MEMORY_TEST_CLASS, destroyDeviceArray_double_free_shifted)
 {
     const stdgpu::index64_t size = 42;
@@ -1186,23 +1021,6 @@ TEST_F(STDGPU_MEMORY_TEST_CLASS, destroyHostArray_double_free_shifted)
     destroyHostArray<int>(array_host_2);
 }
 
-TEST_F(STDGPU_MEMORY_TEST_CLASS, destroyManangedArray_double_free_shifted)
-{
-    const stdgpu::index64_t size = 42;
-    const int default_value = 10;
-    const stdgpu::index_t offset = 24;
-
-    int* array_managed_device = createManagedArray<int>(size, default_value, Initialization::DEVICE);
-    int* array_managed_host = createManagedArray<int>(size, default_value, Initialization::HOST);
-    int* array_managed_device_2 = array_managed_device + offset;
-    int* array_managed_host_2 = array_managed_host + offset;
-
-    destroyManagedArray<int>(array_managed_device);
-    destroyManagedArray<int>(array_managed_device_2);
-    destroyManagedArray<int>(array_managed_host);
-    destroyManagedArray<int>(array_managed_host_2);
-}
-
 template <typename T, typename Allocator = stdgpu::safe_device_allocator<T>>
 class TestContainer
 {
@@ -1487,22 +1305,6 @@ TEST_F(STDGPU_MEMORY_TEST_CLASS, safe_host_allocator)
     a.deallocate(array, size);
 }
 
-TEST_F(STDGPU_MEMORY_TEST_CLASS, safe_managed_allocator)
-{
-    stdgpu::safe_managed_allocator<int> a;
-    const stdgpu::index64_t size = 42;
-
-    int* array = a.allocate(size);
-
-    const int default_value = 10;
-    stdgpu::fill(stdgpu::execution::host, stdgpu::host_begin(array), stdgpu::host_end(array), default_value);
-
-    EXPECT_TRUE(
-            equal_value(stdgpu::execution::host, stdgpu::host_cbegin(array), stdgpu::host_cend(array), default_value));
-
-    a.deallocate(array, size);
-}
-
 namespace
 {
 class Counter
@@ -1692,27 +1494,6 @@ TEST_F(STDGPU_MEMORY_TEST_CLASS, allocator_traits_rebind_host)
     stdgpu::allocator_traits<Allocator>::deallocate(a, array, size);
 }
 
-TEST_F(STDGPU_MEMORY_TEST_CLASS, allocator_traits_rebind_managed)
-{
-    using Allocator_original = stdgpu::safe_managed_allocator<float>;
-    Allocator_original ao;
-
-    using Allocator = typename stdgpu::allocator_traits<Allocator_original>::rebind_alloc<int>;
-    Allocator a(ao);
-
-    const stdgpu::index64_t size = 42;
-
-    int* array = stdgpu::allocator_traits<Allocator>::allocate(a, size);
-
-    const int default_value = 10;
-    stdgpu::fill(stdgpu::execution::host, stdgpu::host_begin(array), stdgpu::host_end(array), default_value);
-
-    EXPECT_TRUE(
-            equal_value(stdgpu::execution::host, stdgpu::host_cbegin(array), stdgpu::host_cend(array), default_value));
-
-    stdgpu::allocator_traits<Allocator>::deallocate(a, array, size);
-}
-
 TEST_F(STDGPU_MEMORY_TEST_CLASS, create_destroy_nontrivial)
 {
     const stdgpu::index64_t size = 42;