Skip to content

Commit

Permalink
Add setting to cache the GPUMemory mappings for pipeline upload
Browse files Browse the repository at this point in the history
During pipeline upload the GpuMemory is allocated from the pool. This
memory is mapped, pipeline is uploaded and then the memory is unmapped.
In applications with lots of pipelines being created during rendering,
this causes lots of map/unmap calls and slows down the pipeline upload.

Add a setting that allows the cache of the GpuMemory mapping, so that it
can be reused by another pipeline upload.
  • Loading branch information
samikhawaja committed Jul 12, 2021
1 parent 1311c5c commit 749cfb4
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 3 deletions.
16 changes: 16 additions & 0 deletions src/core/g_palSettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ void SettingsLoader::SetupDefaults()
m_settings.overlayReportMes = true;
m_settings.mipGenUseFastPath = false;
m_settings.useFp16GenMips = false;
m_settings.keepGPUMemPoolMappedForPipelineCreation = false;
m_settings.tmzEnabled = true;
#if PAL_DEVELOPER_BUILD
m_settings.dbgHelperBits = 0x0;
Expand Down Expand Up @@ -610,6 +611,11 @@ void SettingsLoader::ReadSettings()
&m_settings.useFp16GenMips,
InternalSettingScope::PrivatePalKey);

static_cast<Pal::Device*>(m_pDevice)->ReadSetting(pkeepGPUMemPoolMappedForPipelineCreationStr,
Util::ValueType::Boolean,
&m_settings.keepGPUMemPoolMappedForPipelineCreation,
InternalSettingScope::PrivatePalKey);

static_cast<Pal::Device*>(m_pDevice)->ReadSetting(pTmzEnabledStr,
Util::ValueType::Boolean,
&m_settings.tmzEnabled,
Expand Down Expand Up @@ -655,6 +661,11 @@ void SettingsLoader::RereadSettings()
&m_settings.useFp16GenMips,
InternalSettingScope::PrivatePalKey);

static_cast<Pal::Device*>(m_pDevice)->ReadSetting(pkeepGPUMemPoolMappedForPipelineCreationStr,
Util::ValueType::Boolean,
&m_settings.keepGPUMemPoolMappedForPipelineCreation,
InternalSettingScope::PrivatePalKey);

static_cast<Pal::Device*>(m_pDevice)->ReadSetting(pUseDccStr,
Util::ValueType::Uint,
&m_settings.useDcc,
Expand Down Expand Up @@ -1100,6 +1111,11 @@ void SettingsLoader::InitSettingsInfo()
info.valueSize = sizeof(m_settings.useFp16GenMips);
m_settingsInfoMap.Insert(192229910, info);

info.type = SettingType::Boolean;
info.pValuePtr = &m_settings.keepGPUMemPoolMappedForPipelineCreation;
info.valueSize = sizeof(m_settings.keepGPUMemPoolMappedForPipelineCreation);
m_settingsInfoMap.Insert(3985380410, info);

info.type = SettingType::Boolean;
info.pValuePtr = &m_settings.tmzEnabled;
info.valueSize = sizeof(m_settings.tmzEnabled);
Expand Down
2 changes: 2 additions & 0 deletions src/core/g_palSettings.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ struct PalSettings : public Pal::DriverSettings
bool overlayReportMes;
bool mipGenUseFastPath;
bool useFp16GenMips;
bool keepGPUMemPoolMappedForPipelineCreation;
bool tmzEnabled;
#if PAL_DEVELOPER_BUILD
uint64 dbgHelperBits;
Expand Down Expand Up @@ -378,6 +379,7 @@ static const char* pDebugForceResourceAdditionalPaddingStr = "#3601080919";
static const char* pOverlayReportMesStr = "#1685803860";
static const char* pMipGenUseFastPathStr = "#3353227045";
static const char* pUseFp16GenMipsStr = "#192229910";
static const char* pkeepGPUMemPoolMappedForPipelineCreationStr = "#3985380410";
static const char* pTmzEnabledStr = "#2606194033";
#if PAL_DEVELOPER_BUILD
static const char* pDbgHelperBitsStr = "#3894710420";
Expand Down
16 changes: 14 additions & 2 deletions src/core/hw/gfxip/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,16 @@ Result PipelineUploader::UploadUsingCpu(
const SectionAddressCalculator& addressCalc,
void** ppMappedPtr)
{
Result result = m_pGpuMemory->Map(&m_pMappedPtr);
Result result;
if (m_pDevice->Settings().keepGPUMemPoolMappedForPipelineCreation == false)
{
result = m_pGpuMemory->Map(&m_pMappedPtr);
}
else
{
result = m_pDevice->MemMgr()->Map(m_pGpuMemory, &m_pMappedPtr);
}

if (result == Result::Success)
{
m_pMappedPtr = VoidPtrInc(m_pMappedPtr, static_cast<size_t>(m_baseOffset));
Expand Down Expand Up @@ -1104,7 +1113,10 @@ Result PipelineUploader::End(
else
{
PAL_ASSERT(m_pMappedPtr != nullptr);
result = m_pGpuMemory->Unmap();
if (m_pDevice->Settings().keepGPUMemPoolMappedForPipelineCreation == false)
{
m_pGpuMemory->Unmap();
}
}

m_pMappedPtr = nullptr;
Expand Down
63 changes: 63 additions & 0 deletions src/core/internalMemMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,18 @@ InternalMemMgr::InternalMemMgr(
// Explicitly frees all GPU memory allocations.
void InternalMemMgr::FreeAllocations()
{

for (auto it = m_poolList.Begin(); it.Get() != nullptr; it.Next())
{
PAL_ASSERT((it.Get() != nullptr) && (it.Get()->pBuddyAllocator != nullptr));

if (it.Get()->mapped && it.Get()->pGpuMemory != nullptr)
{
it.Get()->pGpuMemory->Unmap();
it.Get()->mapped = false;
}
}

// Delete the GPU memory objects using the references list
while (m_references.NumElements() != 0)
{
Expand Down Expand Up @@ -563,4 +575,55 @@ uint32 InternalMemMgr::GetReferencesCount()
return static_cast<uint32>(m_references.NumElements());
}

Result InternalMemMgr::Map(
GpuMemory* pGpuMemory,
void** ppData)
{
Util::MutexAuto allocatorLock(&m_allocatorLock); // Ensure thread-safety using the lock
PAL_ASSERT(pGpuMemory != nullptr);

Result result = Result::ErrorInvalidValue;
if (pGpuMemory->WasBuddyAllocated())
{
// Try to find the allocation in the pool list
for (auto it = m_poolList.Begin(); it.Get() != nullptr; it.Next())
{
GpuMemoryPool* pPool = it.Get();

PAL_ASSERT((pPool->pGpuMemory != nullptr) && (pPool->pBuddyAllocator != nullptr));

if (pPool->pGpuMemory == pGpuMemory)
{
if (!pPool->mapped)
{
result = pPool->pGpuMemory->Map(&pPool->pData);
if (result != Result::Success) {
break;
}
pPool->mapped = true;
}
*ppData = pPool->pData;
result = Result::Success;
break;
}
}

// If we didn't find the allocation in the pool list then something went wrong with the allocation scheme
PAL_ASSERT(result == Result::Success);
}
else
{
result = pGpuMemory->Map(ppData);
}

return result;
}

Result InternalMemMgr::Unmap(
GpuMemory* pGpuMemory)
{
return Result::Success;
}


} // Pal
10 changes: 10 additions & 0 deletions src/core/internalMemMgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ struct GpuMemoryPool
uint64 pagingFenceVal; // Paging fence value

Util::BuddyAllocator<Platform>* pBuddyAllocator; // Buddy allocator used for the suballocation
bool mapped; // flag to check if this pool is already mapped
void* pData; // address of the already existing mapping
};

// =====================================================================================================================
Expand Down Expand Up @@ -115,6 +117,14 @@ class InternalMemMgr
// Number of all allocations in the reference list. Note that this function takes the reference list lock.
uint32 GetReferencesCount();

Result Map(
GpuMemory* pGpuMemory,
void** ppData);

Result Unmap(
GpuMemory* pGpuMemory);


private:
Result AllocateBaseGpuMem(
const GpuMemoryCreateInfo& createInfo,
Expand Down
19 changes: 18 additions & 1 deletion src/core/settings_core.json
Original file line number Diff line number Diff line change
Expand Up @@ -1881,6 +1881,23 @@
"VariableName": "useFp16GenMips",
"Description": "If mipGenUseFastPath == true and this is true - use the fp16 single-pass GenMips compute pass."
},
{
"Name": "keepGPUMemPoolMappedForPipelineCreation",
"Tags": [
"Resource Settings",
"Performance"
],
"Defaults": {
"Default": false
},
"Flags": {
"RereadSetting": true
},
"Scope": "PrivatePalKey",
"Type": "bool",
"VariableName": "keepGPUMemPoolMappedForPipelineCreation",
"Description": "If keepGPUMemPoolMappedForPipelineCreation == true the mapped gpu memory for pipeline creation will not be unmapped. This mapped memory can be reused later by another pipeline creation."
},
{
"Name": "TmzEnabled",
"Tags": [
Expand Down Expand Up @@ -2019,4 +2036,4 @@
"Description": "Maximum string length for a miscellaneous string setting"
}
]
}
}

0 comments on commit 749cfb4

Please sign in to comment.