diff --git a/src/core/g_palSettings.cpp b/src/core/g_palSettings.cpp index d7668139..33e2588c 100644 --- a/src/core/g_palSettings.cpp +++ b/src/core/g_palSettings.cpp @@ -162,6 +162,7 @@ void SettingsLoader::SetupDefaults() m_settings.overlayReportMes = true; m_settings.mipGenUseFastPath = false; m_settings.useFp16GenMips = false; + m_settings.keepGPUMemPoolMappedForPipelineCreation = false; m_settings.tmzEnabled = true; #if PAL_DEVELOPER_BUILD m_settings.dbgHelperBits = 0x0; @@ -610,6 +611,11 @@ void SettingsLoader::ReadSettings() &m_settings.useFp16GenMips, InternalSettingScope::PrivatePalKey); + static_cast(m_pDevice)->ReadSetting(pkeepGPUMemPoolMappedForPipelineCreationStr, + Util::ValueType::Boolean, + &m_settings.keepGPUMemPoolMappedForPipelineCreation, + InternalSettingScope::PrivatePalKey); + static_cast(m_pDevice)->ReadSetting(pTmzEnabledStr, Util::ValueType::Boolean, &m_settings.tmzEnabled, @@ -655,6 +661,11 @@ void SettingsLoader::RereadSettings() &m_settings.useFp16GenMips, InternalSettingScope::PrivatePalKey); + static_cast(m_pDevice)->ReadSetting(pkeepGPUMemPoolMappedForPipelineCreationStr, + Util::ValueType::Boolean, + &m_settings.keepGPUMemPoolMappedForPipelineCreation, + InternalSettingScope::PrivatePalKey); + static_cast(m_pDevice)->ReadSetting(pUseDccStr, Util::ValueType::Uint, &m_settings.useDcc, @@ -1100,6 +1111,11 @@ void SettingsLoader::InitSettingsInfo() info.valueSize = sizeof(m_settings.useFp16GenMips); m_settingsInfoMap.Insert(192229910, info); + info.type = SettingType::Boolean; + info.pValuePtr = &m_settings.keepGPUMemPoolMappedForPipelineCreation; + info.valueSize = sizeof(m_settings.keepGPUMemPoolMappedForPipelineCreation); + m_settingsInfoMap.Insert(3985380410, info); + info.type = SettingType::Boolean; info.pValuePtr = &m_settings.tmzEnabled; info.valueSize = sizeof(m_settings.tmzEnabled); diff --git a/src/core/g_palSettings.h b/src/core/g_palSettings.h index 02107710..a8d4af0d 100644 --- a/src/core/g_palSettings.h +++ b/src/core/g_palSettings.h @@ -279,6 +279,7 @@ struct PalSettings : public Pal::DriverSettings bool overlayReportMes; bool mipGenUseFastPath; bool useFp16GenMips; + bool keepGPUMemPoolMappedForPipelineCreation; bool tmzEnabled; #if PAL_DEVELOPER_BUILD uint64 dbgHelperBits; @@ -378,6 +379,7 @@ static const char* pDebugForceResourceAdditionalPaddingStr = "#3601080919"; static const char* pOverlayReportMesStr = "#1685803860"; static const char* pMipGenUseFastPathStr = "#3353227045"; static const char* pUseFp16GenMipsStr = "#192229910"; +static const char* pkeepGPUMemPoolMappedForPipelineCreationStr = "#3985380410"; static const char* pTmzEnabledStr = "#2606194033"; #if PAL_DEVELOPER_BUILD static const char* pDbgHelperBitsStr = "#3894710420"; diff --git a/src/core/hw/gfxip/pipeline.cpp b/src/core/hw/gfxip/pipeline.cpp index cb6a4962..a1c79efa 100644 --- a/src/core/hw/gfxip/pipeline.cpp +++ b/src/core/hw/gfxip/pipeline.cpp @@ -881,7 +881,16 @@ Result PipelineUploader::UploadUsingCpu( const SectionAddressCalculator& addressCalc, void** ppMappedPtr) { - Result result = m_pGpuMemory->Map(&m_pMappedPtr); + Result result; + if (m_pDevice->Settings().keepGPUMemPoolMappedForPipelineCreation == false) + { + result = m_pGpuMemory->Map(&m_pMappedPtr); + } + else + { + result = m_pDevice->MemMgr()->Map(m_pGpuMemory, &m_pMappedPtr); + } + if (result == Result::Success) { m_pMappedPtr = VoidPtrInc(m_pMappedPtr, static_cast(m_baseOffset)); @@ -1104,7 +1113,10 @@ Result PipelineUploader::End( else { PAL_ASSERT(m_pMappedPtr != nullptr); - result = m_pGpuMemory->Unmap(); + if (m_pDevice->Settings().keepGPUMemPoolMappedForPipelineCreation == false) + { + m_pGpuMemory->Unmap(); + } } m_pMappedPtr = nullptr; diff --git a/src/core/internalMemMgr.cpp b/src/core/internalMemMgr.cpp index d39ebdee..15957701 100644 --- a/src/core/internalMemMgr.cpp +++ b/src/core/internalMemMgr.cpp @@ -138,6 +138,18 @@ InternalMemMgr::InternalMemMgr( // Explicitly frees all GPU memory allocations. void InternalMemMgr::FreeAllocations() { + + for (auto it = m_poolList.Begin(); it.Get() != nullptr; it.Next()) + { + PAL_ASSERT((it.Get() != nullptr) && (it.Get()->pBuddyAllocator != nullptr)); + + if (it.Get()->mapped && it.Get()->pGpuMemory != nullptr) + { + it.Get()->pGpuMemory->Unmap(); + it.Get()->mapped = false; + } + } + // Delete the GPU memory objects using the references list while (m_references.NumElements() != 0) { @@ -563,4 +575,55 @@ uint32 InternalMemMgr::GetReferencesCount() return static_cast(m_references.NumElements()); } +Result InternalMemMgr::Map( + GpuMemory* pGpuMemory, + void** ppData) +{ + Util::MutexAuto allocatorLock(&m_allocatorLock); // Ensure thread-safety using the lock + PAL_ASSERT(pGpuMemory != nullptr); + + Result result = Result::ErrorInvalidValue; + if (pGpuMemory->WasBuddyAllocated()) + { + // Try to find the allocation in the pool list + for (auto it = m_poolList.Begin(); it.Get() != nullptr; it.Next()) + { + GpuMemoryPool* pPool = it.Get(); + + PAL_ASSERT((pPool->pGpuMemory != nullptr) && (pPool->pBuddyAllocator != nullptr)); + + if (pPool->pGpuMemory == pGpuMemory) + { + if (!pPool->mapped) + { + result = pPool->pGpuMemory->Map(&pPool->pData); + if (result != Result::Success) { + break; + } + pPool->mapped = true; + } + *ppData = pPool->pData; + result = Result::Success; + break; + } + } + + // If we didn't find the allocation in the pool list then something went wrong with the allocation scheme + PAL_ASSERT(result == Result::Success); + } + else + { + result = pGpuMemory->Map(ppData); + } + + return result; +} + +Result InternalMemMgr::Unmap( + GpuMemory* pGpuMemory) +{ + return Result::Success; +} + + } // Pal diff --git a/src/core/internalMemMgr.h b/src/core/internalMemMgr.h index 2c9541d8..12ff693a 100644 --- a/src/core/internalMemMgr.h +++ b/src/core/internalMemMgr.h @@ -56,6 +56,8 @@ struct GpuMemoryPool uint64 pagingFenceVal; // Paging fence value Util::BuddyAllocator* pBuddyAllocator; // Buddy allocator used for the suballocation + bool mapped; // flag to check if this pool is already mapped + void* pData; // address of the already existing mapping }; // ===================================================================================================================== @@ -115,6 +117,14 @@ class InternalMemMgr // Number of all allocations in the reference list. Note that this function takes the reference list lock. uint32 GetReferencesCount(); + Result Map( + GpuMemory* pGpuMemory, + void** ppData); + + Result Unmap( + GpuMemory* pGpuMemory); + + private: Result AllocateBaseGpuMem( const GpuMemoryCreateInfo& createInfo, diff --git a/src/core/settings_core.json b/src/core/settings_core.json index 14242a5b..73e33ac1 100644 --- a/src/core/settings_core.json +++ b/src/core/settings_core.json @@ -1881,6 +1881,23 @@ "VariableName": "useFp16GenMips", "Description": "If mipGenUseFastPath == true and this is true - use the fp16 single-pass GenMips compute pass." }, + { + "Name": "keepGPUMemPoolMappedForPipelineCreation", + "Tags": [ + "Resource Settings", + "Performance" + ], + "Defaults": { + "Default": false + }, + "Flags": { + "RereadSetting": true + }, + "Scope": "PrivatePalKey", + "Type": "bool", + "VariableName": "keepGPUMemPoolMappedForPipelineCreation", + "Description": "If keepGPUMemPoolMappedForPipelineCreation == true the mapped gpu memory for pipeline creation will not be unmapped. This mapped memory can be reused later by another pipeline creation." + }, { "Name": "TmzEnabled", "Tags": [ @@ -2019,4 +2036,4 @@ "Description": "Maximum string length for a miscellaneous string setting" } ] -} \ No newline at end of file +}