Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add setting to cache the GPUMemory mappings for pipeline upload #78

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/core/g_palSettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ void SettingsLoader::SetupDefaults()
m_settings.overlayReportMes = true;
m_settings.mipGenUseFastPath = false;
m_settings.useFp16GenMips = false;
m_settings.maxMappedPoolsSize = 0;
m_settings.tmzEnabled = true;
#if PAL_DEVELOPER_BUILD
m_settings.dbgHelperBits = 0x0;
Expand Down Expand Up @@ -633,6 +634,11 @@ void SettingsLoader::ReadSettings()
&m_settings.useFp16GenMips,
InternalSettingScope::PrivatePalKey);

static_cast<Pal::Device*>(m_pDevice)->ReadSetting(pmaxMappedPoolsSizeStr,
Util::ValueType::Uint64,
&m_settings.maxMappedPoolsSize,
InternalSettingScope::PrivatePalKey);

static_cast<Pal::Device*>(m_pDevice)->ReadSetting(pTmzEnabledStr,
Util::ValueType::Boolean,
&m_settings.tmzEnabled,
Expand Down Expand Up @@ -683,6 +689,11 @@ void SettingsLoader::RereadSettings()
&m_settings.useFp16GenMips,
InternalSettingScope::PrivatePalKey);

static_cast<Pal::Device*>(m_pDevice)->ReadSetting(pmaxMappedPoolsSizeStr,
Util::ValueType::Uint64,
&m_settings.maxMappedPoolsSize,
InternalSettingScope::PrivatePalKey);

static_cast<Pal::Device*>(m_pDevice)->ReadSetting(pUseDccStr,
Util::ValueType::Uint,
&m_settings.useDcc,
Expand Down Expand Up @@ -1148,6 +1159,11 @@ void SettingsLoader::InitSettingsInfo()
info.valueSize = sizeof(m_settings.useFp16GenMips);
m_settingsInfoMap.Insert(192229910, info);

info.type = SettingType::Uint64;
info.pValuePtr = &m_settings.maxMappedPoolsSize;
info.valueSize = sizeof(m_settings.maxMappedPoolsSize);
m_settingsInfoMap.Insert(3814409436, info);

info.type = SettingType::Boolean;
info.pValuePtr = &m_settings.tmzEnabled;
info.valueSize = sizeof(m_settings.tmzEnabled);
Expand Down
2 changes: 2 additions & 0 deletions src/core/g_palSettings.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ struct PalSettings : public Pal::DriverSettings
bool overlayReportMes;
bool mipGenUseFastPath;
bool useFp16GenMips;
gpusize maxMappedPoolsSize;
bool tmzEnabled;
#if PAL_DEVELOPER_BUILD
uint64 dbgHelperBits;
Expand Down Expand Up @@ -392,6 +393,7 @@ static const char* pDebugForceResourceAdditionalPaddingStr = "#3601080919";
static const char* pOverlayReportMesStr = "#1685803860";
static const char* pMipGenUseFastPathStr = "#3353227045";
static const char* pUseFp16GenMipsStr = "#192229910";
static const char* pmaxMappedPoolsSizeStr = "#3814409436";
static const char* pTmzEnabledStr = "#2606194033";
#if PAL_DEVELOPER_BUILD
static const char* pDbgHelperBitsStr = "#3894710420";
Expand Down
4 changes: 2 additions & 2 deletions src/core/hw/gfxip/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -912,7 +912,7 @@ Result PipelineUploader::UploadUsingCpu(
const SectionAddressCalculator& addressCalc,
void** ppMappedPtr)
{
Result result = m_pGpuMemory->Map(&m_pMappedPtr);
Result result = m_pDevice->MemMgr()->Map(m_pGpuMemory, &m_pMappedPtr);
if (result == Result::Success)
{
m_pMappedPtr = VoidPtrInc(m_pMappedPtr, static_cast<size_t>(m_baseOffset));
Expand Down Expand Up @@ -1141,7 +1141,7 @@ Result PipelineUploader::End(
else
{
PAL_ASSERT(m_pMappedPtr != nullptr);
result = m_pGpuMemory->Unmap();
m_pDevice->MemMgr()->Unmap(m_pGpuMemory);
}

m_pMappedPtr = nullptr;
Expand Down
144 changes: 144 additions & 0 deletions src/core/internalMemMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ InternalMemMgr::InternalMemMgr(
:
m_pDevice(pDevice),
m_poolList(pDevice->GetPlatform()),
m_unusedMappedPoolList(pDevice->GetPlatform()),
m_references(pDevice->GetPlatform()),
m_referenceWatermark(0)
{
Expand All @@ -137,6 +138,24 @@ InternalMemMgr::InternalMemMgr(
// Explicitly frees all GPU memory allocations.
void InternalMemMgr::FreeAllocations()
{

JaxLinAMD marked this conversation as resolved.
Show resolved Hide resolved
for (auto it = m_poolList.Begin(); it.Get() != nullptr; it.Next())
{
PAL_ASSERT((it.Get() != nullptr) && (it.Get()->pBuddyAllocator != nullptr));

if ((it.Get()->pData != nullptr) && (it.Get()->pGpuMemory != nullptr))
{
it.Get()->pGpuMemory->Unmap();
it.Get()->pData = nullptr;
}
}

while (m_unusedMappedPoolList.NumElements() > 0)
{
auto it = m_unusedMappedPoolList.Begin();
m_unusedMappedPoolList.Erase(&it);
}

// Delete the GPU memory objects using the references list
while (m_references.NumElements() != 0)
{
Expand Down Expand Up @@ -562,4 +581,129 @@ uint32 InternalMemMgr::GetReferencesCount()
return static_cast<uint32>(m_references.NumElements());
}

// =====================================================================================================================
// Map the GPU memory allocation for cpu access
Result InternalMemMgr::Map(
GpuMemory* pGpuMemory,
JaxLinAMD marked this conversation as resolved.
Show resolved Hide resolved
void** ppData)
samikhawaja marked this conversation as resolved.
Show resolved Hide resolved
JaxLinAMD marked this conversation as resolved.
Show resolved Hide resolved
{
PAL_ASSERT(pGpuMemory != nullptr);
Result result = Result::ErrorInvalidValue;
if (pGpuMemory->WasBuddyAllocated())
{
Util::MutexAuto allocatorLock(&m_allocatorLock); // Ensure thread-safety using the lock
// Try to find the allocation in the pool list
for (auto it = m_poolList.Begin(); it.Get() != nullptr; it.Next())
{
GpuMemoryPool* pPool = it.Get();

PAL_ASSERT((pPool->pGpuMemory != nullptr) && (pPool->pBuddyAllocator != nullptr));

if (pPool->pGpuMemory == pGpuMemory)
{
if (pPool->pData == nullptr)
{
result = pPool->pGpuMemory->Map(&pPool->pData);
if (result != Result::Success)
{
pPool->pData = nullptr;
break;
}
m_totalSizeMappedPools += pPool->pGpuMemory->Desc().size;
CheckMappedPoolLimit();
}
else if (pPool->refCount == 0)
{
// should be in unused list, remove it from there.
for (auto it2 = m_unusedMappedPoolList.Begin(); it2.Get() != nullptr; it2.Next())
{
if (*(it2.Get()) == pPool)
{
m_unusedMappedPoolList.Erase(&it2);
break;
}
}
}
pPool->refCount++;
*ppData = pPool->pData;
result = Result::Success;
break;
}
}

// If we didn't find the allocation in the pool list then something went wrong with the allocation scheme
PAL_ASSERT(result == Result::Success);
}
else
{
result = pGpuMemory->Map(ppData);
}

return result;
}

// =====================================================================================================================
// Unmap the GPU memory allocation from cpu address space
Result InternalMemMgr::Unmap(
GpuMemory* pGpuMemory)
samikhawaja marked this conversation as resolved.
Show resolved Hide resolved
JaxLinAMD marked this conversation as resolved.
Show resolved Hide resolved
{
PAL_ASSERT(pGpuMemory != nullptr);
if (pGpuMemory->WasBuddyAllocated())
{
Util::MutexAuto allocatorLock(&m_allocatorLock); // Ensure thread-safety using the lock
// Try to find the allocation in the pool list
for (auto it = m_poolList.Begin(); it.Get() != nullptr; it.Next())
{
GpuMemoryPool* pPool = it.Get();

PAL_ASSERT((pPool->pGpuMemory != nullptr) && (pPool->pBuddyAllocator != nullptr));
if (pPool->pGpuMemory == pGpuMemory)
{
if (pPool->pData != nullptr)
{
pPool->refCount--;
if (pPool->refCount == 0)
{
m_unusedMappedPoolList.PushBack(pPool);
CheckMappedPoolLimit();
}
}
break;
}
}
}
samikhawaja marked this conversation as resolved.
Show resolved Hide resolved
else
{
pGpuMemory->Unmap();
}

return Result::Success;
}

// =====================================================================================================================
// Check the total size of mapped pools, if it is greater than maximum limit then unmap the least recently used memory
void InternalMemMgr::CheckMappedPoolLimit()
samikhawaja marked this conversation as resolved.
Show resolved Hide resolved
{
if (m_pDevice->Settings().maxMappedPoolsSize >= 0)
{
while ((m_totalSizeMappedPools > m_pDevice->Settings().maxMappedPoolsSize)
JaxLinAMD marked this conversation as resolved.
Show resolved Hide resolved
&& (m_unusedMappedPoolList.NumElements() > 0))
JaxLinAMD marked this conversation as resolved.
Show resolved Hide resolved
{
auto it = m_unusedMappedPoolList.Begin();
GpuMemoryPool *pPool = *it.Get();

PAL_ASSERT(pPool->pBuddyAllocator != nullptr);
if ((pPool->pData != nullptr) && (pPool->pGpuMemory != nullptr))
{
pPool->pGpuMemory->Unmap();
pPool->pData = nullptr;
}
m_unusedMappedPoolList.Erase(&it);
PAL_ASSERT(m_totalSizeMappedPools >= pPool->pGpuMemory->Desc().size);
m_totalSizeMappedPools -= pPool->pGpuMemory->Desc().size;
}
}
}


} // Pal
20 changes: 20 additions & 0 deletions src/core/internalMemMgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ struct GpuMemoryPool
uint64 pagingFenceVal; // Paging fence value

Util::BuddyAllocator<Platform>* pBuddyAllocator; // Buddy allocator used for the suballocation
void* pData; // address of the already existing mapping
JaxLinAMD marked this conversation as resolved.
Show resolved Hide resolved
size_t refCount; // refCount the number of memory allocations use this mapping
JaxLinAMD marked this conversation as resolved.
Show resolved Hide resolved
};

// =====================================================================================================================
Expand All @@ -77,6 +79,7 @@ class InternalMemMgr
typedef Util::ListIterator<GpuMemoryInfo, Platform> GpuMemoryListIterator;

typedef Util::List<GpuMemoryPool, Platform> GpuMemoryPoolList;
typedef Util::List<GpuMemoryPool*, Platform> GpuMemoryPoolRefList;

explicit InternalMemMgr(Device* pDevice);
~InternalMemMgr() { FreeAllocations(); }
Expand Down Expand Up @@ -115,6 +118,17 @@ class InternalMemMgr
// Number of all allocations in the reference list. Note that this function takes the reference list lock.
uint32 GetReferencesCount();

Result Map(
GpuMemory* pGpuMemory,
void** ppData);
JaxLinAMD marked this conversation as resolved.
Show resolved Hide resolved

Result Unmap(
GpuMemory* pGpuMemory);

// If the number of mapped pools are more then the maximum limit then unmap the least recently used pool.
void CheckMappedPoolLimit();


JaxLinAMD marked this conversation as resolved.
Show resolved Hide resolved
private:
Result AllocateBaseGpuMem(
const GpuMemoryCreateInfo& createInfo,
Expand All @@ -133,6 +147,9 @@ class InternalMemMgr
// Maintain a list of GPU memory objects that are sub-allocated
GpuMemoryPoolList m_poolList;

// Maintain a list of GPU memory objects that are sub-allocated and mapped but unused
GpuMemoryPoolRefList m_unusedMappedPoolList;

// Maintain a list of internal GPU memory references
GpuMemoryList m_references;

Expand All @@ -142,6 +159,9 @@ class InternalMemMgr
// Ever-incrementing watermark to signal changes to the internal memory reference list
uint32 m_referenceWatermark;

// Total size of mapped pools
gpusize m_totalSizeMappedPools;

PAL_DISALLOW_COPY_AND_ASSIGN(InternalMemMgr);
PAL_DISALLOW_DEFAULT_CTOR(InternalMemMgr);
};
Expand Down
19 changes: 18 additions & 1 deletion src/core/settings_core.json
Original file line number Diff line number Diff line change
Expand Up @@ -1945,6 +1945,23 @@
"VariableName": "useFp16GenMips",
"Description": "If mipGenUseFastPath == true and this is true - use the fp16 single-pass GenMips compute pass."
},
{
"Name": "maxMappedPoolsSize",
"Tags": [
"Resource Settings",
"Performance"
],
"Defaults": {
"Default": 0
samikhawaja marked this conversation as resolved.
Show resolved Hide resolved
},
"Flags": {
"RereadSetting": true
},
"Scope": "PrivatePalKey",
"Type": "gpusize",
"VariableName": "maxMappedPoolsSize",
"Description": "If maxMappedPoolsSize > 0 the mapped gpu memory for pipeline creation will not be unmapped. If the total size of mapped pools grows greater than maxMappedPoolsSize, then the least recently used pools will be unmapped."
},
{
"Name": "TmzEnabled",
"Tags": [
Expand Down Expand Up @@ -2129,4 +2146,4 @@
"Description": "Maximum string length for a miscellaneous string setting"
}
]
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @samikhawaja could you tell me how you made this change in your editor? I can't tell the difference between the two lines.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's just a trailing newline. No functional change.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The coding standard above doesn't say anything about the trailing new line. I can remove it if this is not ok.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is ok, no need to change it any more :)

}