From 749cfb4420ffe28241aafab979d92ff856626cc2 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Mon, 12 Jul 2021 14:53:41 +0000 Subject: [PATCH] Add setting to cache the GPUMemory mappings for pipeline upload During pipeline upload the GpuMemory is allocated from the pool. This memory is mapped, pipeline is uploaded and then the memory is unmapped. In applications with lots of pipelines being created during rendering, this causes lots of map/unmap calls and slows down the pipeline upload. Add a setting that allows the cache of the GpuMemory mapping, so that it can be reused by another pipeline upload. --- src/core/g_palSettings.cpp | 16 +++++++++ src/core/g_palSettings.h | 2 ++ src/core/hw/gfxip/pipeline.cpp | 16 +++++++-- src/core/internalMemMgr.cpp | 63 ++++++++++++++++++++++++++++++++++ src/core/internalMemMgr.h | 10 ++++++ src/core/settings_core.json | 19 +++++++++- 6 files changed, 123 insertions(+), 3 deletions(-) diff --git a/src/core/g_palSettings.cpp b/src/core/g_palSettings.cpp index d7668139..33e2588c 100644 --- a/src/core/g_palSettings.cpp +++ b/src/core/g_palSettings.cpp @@ -162,6 +162,7 @@ void SettingsLoader::SetupDefaults() m_settings.overlayReportMes = true; m_settings.mipGenUseFastPath = false; m_settings.useFp16GenMips = false; + m_settings.keepGPUMemPoolMappedForPipelineCreation = false; m_settings.tmzEnabled = true; #if PAL_DEVELOPER_BUILD m_settings.dbgHelperBits = 0x0; @@ -610,6 +611,11 @@ void SettingsLoader::ReadSettings() &m_settings.useFp16GenMips, InternalSettingScope::PrivatePalKey); + static_cast(m_pDevice)->ReadSetting(pkeepGPUMemPoolMappedForPipelineCreationStr, + Util::ValueType::Boolean, + &m_settings.keepGPUMemPoolMappedForPipelineCreation, + InternalSettingScope::PrivatePalKey); + static_cast(m_pDevice)->ReadSetting(pTmzEnabledStr, Util::ValueType::Boolean, &m_settings.tmzEnabled, @@ -655,6 +661,11 @@ void SettingsLoader::RereadSettings() &m_settings.useFp16GenMips, InternalSettingScope::PrivatePalKey); + static_cast(m_pDevice)->ReadSetting(pkeepGPUMemPoolMappedForPipelineCreationStr, + Util::ValueType::Boolean, + &m_settings.keepGPUMemPoolMappedForPipelineCreation, + InternalSettingScope::PrivatePalKey); + static_cast(m_pDevice)->ReadSetting(pUseDccStr, Util::ValueType::Uint, &m_settings.useDcc, @@ -1100,6 +1111,11 @@ void SettingsLoader::InitSettingsInfo() info.valueSize = sizeof(m_settings.useFp16GenMips); m_settingsInfoMap.Insert(192229910, info); + info.type = SettingType::Boolean; + info.pValuePtr = &m_settings.keepGPUMemPoolMappedForPipelineCreation; + info.valueSize = sizeof(m_settings.keepGPUMemPoolMappedForPipelineCreation); + m_settingsInfoMap.Insert(3985380410, info); + info.type = SettingType::Boolean; info.pValuePtr = &m_settings.tmzEnabled; info.valueSize = sizeof(m_settings.tmzEnabled); diff --git a/src/core/g_palSettings.h b/src/core/g_palSettings.h index 02107710..a8d4af0d 100644 --- a/src/core/g_palSettings.h +++ b/src/core/g_palSettings.h @@ -279,6 +279,7 @@ struct PalSettings : public Pal::DriverSettings bool overlayReportMes; bool mipGenUseFastPath; bool useFp16GenMips; + bool keepGPUMemPoolMappedForPipelineCreation; bool tmzEnabled; #if PAL_DEVELOPER_BUILD uint64 dbgHelperBits; @@ -378,6 +379,7 @@ static const char* pDebugForceResourceAdditionalPaddingStr = "#3601080919"; static const char* pOverlayReportMesStr = "#1685803860"; static const char* pMipGenUseFastPathStr = "#3353227045"; static const char* pUseFp16GenMipsStr = "#192229910"; +static const char* pkeepGPUMemPoolMappedForPipelineCreationStr = "#3985380410"; static const char* pTmzEnabledStr = "#2606194033"; #if PAL_DEVELOPER_BUILD static const char* pDbgHelperBitsStr = "#3894710420"; diff --git a/src/core/hw/gfxip/pipeline.cpp b/src/core/hw/gfxip/pipeline.cpp index cb6a4962..a1c79efa 100644 --- a/src/core/hw/gfxip/pipeline.cpp +++ b/src/core/hw/gfxip/pipeline.cpp @@ -881,7 +881,16 @@ Result PipelineUploader::UploadUsingCpu( const SectionAddressCalculator& addressCalc, void** ppMappedPtr) { - Result result = m_pGpuMemory->Map(&m_pMappedPtr); + Result result; + if (m_pDevice->Settings().keepGPUMemPoolMappedForPipelineCreation == false) + { + result = m_pGpuMemory->Map(&m_pMappedPtr); + } + else + { + result = m_pDevice->MemMgr()->Map(m_pGpuMemory, &m_pMappedPtr); + } + if (result == Result::Success) { m_pMappedPtr = VoidPtrInc(m_pMappedPtr, static_cast(m_baseOffset)); @@ -1104,7 +1113,10 @@ Result PipelineUploader::End( else { PAL_ASSERT(m_pMappedPtr != nullptr); - result = m_pGpuMemory->Unmap(); + if (m_pDevice->Settings().keepGPUMemPoolMappedForPipelineCreation == false) + { + m_pGpuMemory->Unmap(); + } } m_pMappedPtr = nullptr; diff --git a/src/core/internalMemMgr.cpp b/src/core/internalMemMgr.cpp index d39ebdee..15957701 100644 --- a/src/core/internalMemMgr.cpp +++ b/src/core/internalMemMgr.cpp @@ -138,6 +138,18 @@ InternalMemMgr::InternalMemMgr( // Explicitly frees all GPU memory allocations. void InternalMemMgr::FreeAllocations() { + + for (auto it = m_poolList.Begin(); it.Get() != nullptr; it.Next()) + { + PAL_ASSERT((it.Get() != nullptr) && (it.Get()->pBuddyAllocator != nullptr)); + + if (it.Get()->mapped && it.Get()->pGpuMemory != nullptr) + { + it.Get()->pGpuMemory->Unmap(); + it.Get()->mapped = false; + } + } + // Delete the GPU memory objects using the references list while (m_references.NumElements() != 0) { @@ -563,4 +575,55 @@ uint32 InternalMemMgr::GetReferencesCount() return static_cast(m_references.NumElements()); } +Result InternalMemMgr::Map( + GpuMemory* pGpuMemory, + void** ppData) +{ + Util::MutexAuto allocatorLock(&m_allocatorLock); // Ensure thread-safety using the lock + PAL_ASSERT(pGpuMemory != nullptr); + + Result result = Result::ErrorInvalidValue; + if (pGpuMemory->WasBuddyAllocated()) + { + // Try to find the allocation in the pool list + for (auto it = m_poolList.Begin(); it.Get() != nullptr; it.Next()) + { + GpuMemoryPool* pPool = it.Get(); + + PAL_ASSERT((pPool->pGpuMemory != nullptr) && (pPool->pBuddyAllocator != nullptr)); + + if (pPool->pGpuMemory == pGpuMemory) + { + if (!pPool->mapped) + { + result = pPool->pGpuMemory->Map(&pPool->pData); + if (result != Result::Success) { + break; + } + pPool->mapped = true; + } + *ppData = pPool->pData; + result = Result::Success; + break; + } + } + + // If we didn't find the allocation in the pool list then something went wrong with the allocation scheme + PAL_ASSERT(result == Result::Success); + } + else + { + result = pGpuMemory->Map(ppData); + } + + return result; +} + +Result InternalMemMgr::Unmap( + GpuMemory* pGpuMemory) +{ + return Result::Success; +} + + } // Pal diff --git a/src/core/internalMemMgr.h b/src/core/internalMemMgr.h index 2c9541d8..12ff693a 100644 --- a/src/core/internalMemMgr.h +++ b/src/core/internalMemMgr.h @@ -56,6 +56,8 @@ struct GpuMemoryPool uint64 pagingFenceVal; // Paging fence value Util::BuddyAllocator* pBuddyAllocator; // Buddy allocator used for the suballocation + bool mapped; // flag to check if this pool is already mapped + void* pData; // address of the already existing mapping }; // ===================================================================================================================== @@ -115,6 +117,14 @@ class InternalMemMgr // Number of all allocations in the reference list. Note that this function takes the reference list lock. uint32 GetReferencesCount(); + Result Map( + GpuMemory* pGpuMemory, + void** ppData); + + Result Unmap( + GpuMemory* pGpuMemory); + + private: Result AllocateBaseGpuMem( const GpuMemoryCreateInfo& createInfo, diff --git a/src/core/settings_core.json b/src/core/settings_core.json index 14242a5b..73e33ac1 100644 --- a/src/core/settings_core.json +++ b/src/core/settings_core.json @@ -1881,6 +1881,23 @@ "VariableName": "useFp16GenMips", "Description": "If mipGenUseFastPath == true and this is true - use the fp16 single-pass GenMips compute pass." }, + { + "Name": "keepGPUMemPoolMappedForPipelineCreation", + "Tags": [ + "Resource Settings", + "Performance" + ], + "Defaults": { + "Default": false + }, + "Flags": { + "RereadSetting": true + }, + "Scope": "PrivatePalKey", + "Type": "bool", + "VariableName": "keepGPUMemPoolMappedForPipelineCreation", + "Description": "If keepGPUMemPoolMappedForPipelineCreation == true the mapped gpu memory for pipeline creation will not be unmapped. This mapped memory can be reused later by another pipeline creation." + }, { "Name": "TmzEnabled", "Tags": [ @@ -2019,4 +2036,4 @@ "Description": "Maximum string length for a miscellaneous string setting" } ] -} \ No newline at end of file +}