Skip to content

Commit f98db24

Browse files
committed
materialsystem: threaded optimizations, fix mat_queue_mode on some android devices
1 parent 3458c36 commit f98db24

File tree

8 files changed

+60
-140
lines changed

8 files changed

+60
-140
lines changed

materialsystem/cmaterialsystem.cpp

+11-34
Original file line numberDiff line numberDiff line change
@@ -1029,7 +1029,7 @@ bool CMaterialSystem::AllowThreading( bool bAllow, int nServiceThread )
10291029

10301030
bool bOldAllow = m_bAllowQueuedRendering;
10311031

1032-
if ( GetCPUInformation()->m_nPhysicalProcessors >= 2 )
1032+
if ( GetCPUInformation()->m_nLogicalProcessors >= 2 )
10331033
{
10341034
m_bAllowQueuedRendering = bAllow;
10351035
bool bQueued = m_IdealThreadMode != MATERIAL_SINGLE_THREADED;
@@ -1806,11 +1806,7 @@ static ConVar mat_normalmaps( "mat_normalmaps", "0", FCVAR_CHEAT );
18061806
static ConVar mat_measurefillrate( "mat_measurefillrate", "0", FCVAR_CHEAT );
18071807
static ConVar mat_fillrate( "mat_fillrate", "0", FCVAR_CHEAT );
18081808
static ConVar mat_reversedepth( "mat_reversedepth", "0", FCVAR_CHEAT );
1809-
#ifdef DX_TO_GL_ABSTRACTION
1810-
static ConVar mat_bufferprimitives( "mat_bufferprimitives", "0" ); // I'm not seeing any benefit speed wise for buffered primitives on GLM/POSIX (checked via TF2 timedemo) - default to zero
1811-
#else
18121809
static ConVar mat_bufferprimitives( "mat_bufferprimitives", "1" );
1813-
#endif
18141810
static ConVar mat_drawflat( "mat_drawflat","0", FCVAR_CHEAT );
18151811
static ConVar mat_softwarelighting( "mat_softwarelighting", "0", FCVAR_ALLOWED_IN_COMPETITIVE );
18161812
static ConVar mat_proxy( "mat_proxy", "0", FCVAR_CHEAT, "", MatProxyCallback );
@@ -2780,8 +2776,8 @@ IMaterial* CMaterialSystem::FindMaterialEx( char const* pMaterialName, const cha
27802776
{
27812777
// We need lower-case symbols for this to work
27822778
int nLen = Q_strlen( pMaterialName ) + 1;
2783-
char *pFixedNameTemp = (char*)malloc( nLen );
2784-
char *pTemp = (char*)malloc( nLen );
2779+
char *pFixedNameTemp = (char*)stackalloc( nLen );
2780+
char *pTemp = (char*)stackalloc( nLen );
27852781
Q_strncpy( pFixedNameTemp, pMaterialName, nLen );
27862782
Q_strlower( pFixedNameTemp );
27872783
#ifdef POSIX
@@ -2883,9 +2879,6 @@ IMaterial* CMaterialSystem::FindMaterialEx( char const* pMaterialName, const cha
28832879
}
28842880
}
28852881

2886-
free(pTemp);
2887-
free(pFixedNameTemp);
2888-
28892882
return g_pErrorMaterial->GetRealTimeVersion();
28902883
}
28912884

@@ -3103,20 +3096,12 @@ void CMaterialSystem::ResetTempHWMemory( bool bExitingLevel )
31033096
//-----------------------------------------------------------------------------
31043097
void CMaterialSystem::CacheUsedMaterials( )
31053098
{
3099+
printf("Cache materials\n");
3100+
31063101
g_pShaderAPI->EvictManagedResources();
3107-
size_t count = 0;
3102+
31083103
for (MaterialHandle_t i = FirstMaterial(); i != InvalidMaterial(); i = NextMaterial(i) )
31093104
{
3110-
// Some (mac) drivers (amd) seem to keep extra resources around on uploads until the next frame swap. This
3111-
// injects pointless synthetic swaps (between already-static load frames)
3112-
if ( mat_texture_reload_frame_swap_workaround.GetBool() )
3113-
{
3114-
if ( count++ % 20 == 0 )
3115-
{
3116-
Flush(true);
3117-
SwapBuffers(); // Not the right thing to call
3118-
}
3119-
}
31203105
IMaterialInternal* pMat = GetMaterialInternal(i);
31213106
Assert( pMat->GetReferenceCount() >= 0 );
31223107
if( pMat->GetReferenceCount() > 0 )
@@ -3703,9 +3688,13 @@ void CMaterialSystem::EndFrame( void )
37033688
ThreadAcquire( true );
37043689
}
37053690

3691+
IThreadPool* pThreadPool = CreateMatQueueThreadPool();
3692+
37063693
if ( m_pActiveAsyncJob && !m_pActiveAsyncJob->IsFinished() )
37073694
{
3708-
m_pActiveAsyncJob->WaitForFinish();
3695+
m_pActiveAsyncJob->WaitForFinish(TT_INFINITE, pThreadPool);
3696+
3697+
// Sync with GPU if we had a job for it, even if it finished early on CPU!
37093698
if ( !IsPC() && g_config.ForceHWSync() )
37103699
{
37113700
g_pShaderAPI->ForceHardwareSync();
@@ -3730,7 +3719,6 @@ void CMaterialSystem::EndFrame( void )
37303719
}
37313720
}
37323721

3733-
IThreadPool *pThreadPool = CreateMatQueueThreadPool();
37343722
pThreadPool->AddJob( m_pActiveAsyncJob );
37353723
break;
37363724
}
@@ -4664,20 +4652,9 @@ void CMaterialSystem::BeginRenderTargetAllocation( void )
46644652

46654653
void CMaterialSystem::EndRenderTargetAllocation( void )
46664654
{
4667-
// Any GPU newer than 2005 doesn't need to do this, and it eats up ~40% of our level load time!
4668-
const bool cbRequiresRenderTargetAllocationFirst = mat_requires_rt_alloc_first.GetBool();
4669-
46704655
g_pShaderAPI->FlushBufferedPrimitives();
46714656
m_bAllocatingRenderTargets = false;
46724657

4673-
if ( IsPC() && cbRequiresRenderTargetAllocationFirst && g_pShaderAPI->CanDownloadTextures() )
4674-
{
4675-
// Simulate an Alt-Tab...will cause RTs to be allocated first
4676-
4677-
g_pShaderDevice->ReleaseResources();
4678-
g_pShaderDevice->ReacquireResources();
4679-
}
4680-
46814658
TextureManager()->CacheExternalStandardRenderTargets();
46824659
}
46834660

materialsystem/cmatqueuedrendercontext.cpp

+3-6
Original file line numberDiff line numberDiff line change
@@ -455,14 +455,11 @@ class CMatQueuedMesh : public IMesh
455455
}
456456
else
457457
{
458-
ALIGN16 uint16 tempIndices[16];
458+
static ALIGN16 uint16 tempIndices[256];
459459

460+
// original method
460461
int i = 0;
461-
if ( (size_t)desc.m_pIndices % 4 == 2 )
462-
{
463-
desc.m_pIndices[i] = pIndexData[i] + desc.m_nFirstVertex;
464-
i++;
465-
}
462+
466463
while ( i < nIndices )
467464
{
468465
int nToCopy = min( (int)ARRAYSIZE(tempIndices), nIndices - i );

materialsystem/ctexture.cpp

+1-14
Original file line numberDiff line numberDiff line change
@@ -2458,15 +2458,8 @@ bool CTexture::AsyncReadTextureFromFile( IVTFTexture* pVTFTexture, unsigned int
24582458
return false;
24592459
}
24602460

2461-
if ( V_strstr( GetName(), "c_sniperrifle_scope" ) )
2462-
{
2463-
int i = 0;
2464-
i = 3;
2465-
}
2466-
2467-
24682461
tmZone( TELEMETRY_LEVEL0, TMZF_NONE, "%s - %s", __FUNCTION__, tmDynamicString( TELEMETRY_LEVEL0, pCacheFileName ) );
2469-
2462+
24702463
// OSX hackery
24712464
int nPreserveFlags = nAdditionalCreationFlags;
24722465
if ( m_nFlags & TEXTUREFLAGS_SRGB )
@@ -4189,12 +4182,6 @@ bool SLoadTextureBitsFromFile( IVTFTexture **ppOutVtfTexture, FileHandle_t hFile
41894182
// NOTE! NOTE! NOTE! or by the streaming texture code!
41904183
Assert( ppOutVtfTexture != NULL && *ppOutVtfTexture != NULL );
41914184

4192-
if ( V_strstr( pName, "c_rocketlauncher/c_rocketlauncher" ) )
4193-
{
4194-
int i = 0;
4195-
i = 3;
4196-
}
4197-
41984185
CUtlBuffer buf;
41994186

42004187
{

public/tier0/threadtools.h

+33-5
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,6 @@ PLATFORM_INTERFACE bool ReleaseThreadHandle( ThreadHandle_t );
194194

195195
//-----------------------------------------------------------------------------
196196

197-
PLATFORM_INTERFACE void ThreadSleep(unsigned duration = 0);
198-
PLATFORM_INTERFACE void ThreadNanoSleep(unsigned ns);
199197
PLATFORM_INTERFACE ThreadId_t ThreadGetCurrentId();
200198
PLATFORM_INTERFACE ThreadHandle_t ThreadGetCurrentHandle();
201199
PLATFORM_INTERFACE int ThreadGetPriority( ThreadHandle_t hThread = NULL );
@@ -229,10 +227,10 @@ inline void ThreadPause()
229227
{
230228
#if defined( COMPILER_PS3 )
231229
__db16cyc();
232-
#elif defined(__arm__) || defined(__aarch64__)
233-
sched_yield();
234-
#elif defined( COMPILER_GCC )
230+
#elif defined( COMPILER_GCC ) && (defined( __i386__ ) || defined( __x86_64__ ))
235231
__asm __volatile( "pause" );
232+
#elif defined( POSIX )
233+
sched_yield();
236234
#elif defined ( COMPILER_MSVC64 )
237235
_mm_pause();
238236
#elif defined( COMPILER_MSVC32 )
@@ -247,6 +245,36 @@ inline void ThreadPause()
247245
#endif
248246
}
249247

248+
inline void ThreadSleep(unsigned nMilliseconds = 0)
249+
{
250+
if( nMilliseconds == 0 )
251+
{
252+
ThreadPause();
253+
return;
254+
}
255+
256+
#ifdef _WIN32
257+
258+
#ifdef _WIN32_PC
259+
static bool bInitialized = false;
260+
if ( !bInitialized )
261+
{
262+
bInitialized = true;
263+
// Set the timer resolution to 1 ms (default is 10.0, 15.6, 2.5, 1.0 or
264+
// some other value depending on hardware and software) so that we can
265+
// use Sleep( 1 ) to avoid wasting CPU time without missing our frame
266+
// rate.
267+
timeBeginPeriod( 1 );
268+
}
269+
#endif
270+
::Sleep( nMilliseconds );
271+
#elif PS3
272+
sys_timer_usleep( nMilliseconds * 1000 );
273+
#elif defined(POSIX)
274+
usleep( nMilliseconds * 1000 );
275+
#endif
276+
}
277+
250278
PLATFORM_INTERFACE bool ThreadJoin( ThreadHandle_t, unsigned timeout = TT_INFINITE );
251279

252280
PLATFORM_INTERFACE void ThreadSetDebugName( ThreadHandle_t hThread, const char *pszName );

public/tier1/memhelpers.h

+5-11
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,15 @@ namespace memutils
1111
template<typename T>
1212
inline void copy( T *dest, const T *src, size_t n )
1313
{
14-
do
15-
{
16-
--n;
17-
*(dest+n) = *(src+n);
18-
} while( n );
14+
for(; n; n--)
15+
*(dest++) = *(src++);
1916
}
2017

2118
template<typename T>
22-
inline void set( T *dest, T value, size_t n )
19+
inline void set( T *dest, const T& value, size_t n )
2320
{
24-
do
25-
{
26-
--n;
27-
*(dest+n) = value;
28-
} while( n );
21+
for(; n; n--)
22+
*(dest++) = value;
2923
}
3024
}
3125

public/vstdlib/jobthread.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -492,8 +492,8 @@ class CJob : public CRefCounted1<IRefCounted, CRefCountServiceMT>
492492
//-----------------------------------------------------
493493
// Thread event support (safe for NULL this to simplify code )
494494
//-----------------------------------------------------
495-
bool WaitForFinish( uint32 dwTimeout = TT_INFINITE ) { if (!this) return true; return ( !IsFinished() ) ? g_pThreadPool->YieldWait( this, dwTimeout ) : true; }
496-
bool WaitForFinishAndRelease( uint32 dwTimeout = TT_INFINITE ) { if (!this) return true; bool bResult = WaitForFinish( dwTimeout); Release(); return bResult; }
495+
inline bool WaitForFinish( uint32 dwTimeout = TT_INFINITE, IThreadPool *pool = g_pThreadPool ) { if (!this) return true; return ( !IsFinished() ) ? pool->YieldWait( this, dwTimeout ) : true; }
496+
inline bool WaitForFinishAndRelease( uint32 dwTimeout = TT_INFINITE ) { if (!this) return true; bool bResult = WaitForFinish( dwTimeout); Release(); return bResult; }
497497
CThreadEvent *AccessEvent() { return &m_CompleteEvent; }
498498

499499
//-----------------------------------------------------

tier0/threadtools.cpp

-53
Original file line numberDiff line numberDiff line change
@@ -485,59 +485,6 @@ bool ReleaseThreadHandle( ThreadHandle_t hThread )
485485
//
486486
//-----------------------------------------------------------------------------
487487

488-
void ThreadSleep(unsigned nMilliseconds)
489-
{
490-
#ifdef _WIN32
491-
492-
#ifdef _WIN32_PC
493-
static bool bInitialized = false;
494-
if ( !bInitialized )
495-
{
496-
bInitialized = true;
497-
// Set the timer resolution to 1 ms (default is 10.0, 15.6, 2.5, 1.0 or
498-
// some other value depending on hardware and software) so that we can
499-
// use Sleep( 1 ) to avoid wasting CPU time without missing our frame
500-
// rate.
501-
timeBeginPeriod( 1 );
502-
}
503-
#endif
504-
505-
Sleep( nMilliseconds );
506-
#elif PS3
507-
if( nMilliseconds == 0 )
508-
{
509-
// sys_ppu_thread_yield doesn't seem to function properly, so sleep instead.
510-
// sys_timer_usleep( 60 );
511-
sys_ppu_thread_yield();
512-
}
513-
else
514-
{
515-
sys_timer_usleep( nMilliseconds * 1000 );
516-
}
517-
#elif defined(POSIX)
518-
usleep( nMilliseconds * 1000 );
519-
#endif
520-
}
521-
522-
//-----------------------------------------------------------------------------
523-
void ThreadNanoSleep(unsigned ns)
524-
{
525-
#ifdef _WIN32
526-
// ceil
527-
Sleep( ( ns + 999 ) / 1000 );
528-
#elif PS3
529-
sys_timer_usleep( ns );
530-
#elif defined(POSIX)
531-
struct timespec tm;
532-
tm.tv_sec = 0;
533-
tm.tv_nsec = ns;
534-
nanosleep( &tm, NULL );
535-
#endif
536-
}
537-
538-
539-
//-----------------------------------------------------------------------------
540-
541488
#ifndef ThreadGetCurrentId
542489
ThreadId_t ThreadGetCurrentId()
543490
{

vstdlib/jobthread.cpp

+5-15
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,11 @@ class CThreadPool : public CRefCounted1<IThreadPool, CRefCountServiceMT>
214214
//-----------------------------------------------------
215215
virtual int YieldWait( CThreadEvent **pEvents, int nEvents, bool bWaitAll = true, unsigned timeout = TT_INFINITE );
216216
virtual int YieldWait( CJob **, int nJobs, bool bWaitAll = true, unsigned timeout = TT_INFINITE );
217-
void Yield( unsigned timeout );
217+
inline void Yield( unsigned timeout )
218+
{
219+
Assert( ThreadInMainThread() );
220+
ThreadSleep( timeout );
221+
}
218222

219223
//-----------------------------------------------------
220224
// Add a native job to the queue (master thread)
@@ -656,20 +660,6 @@ int CThreadPool::YieldWait( CJob **ppJobs, int nJobs, bool bWaitAll, unsigned ti
656660
return YieldWait( handles.Base(), handles.Count(), bWaitAll, timeout);
657661
}
658662

659-
//---------------------------------------------------------
660-
661-
void CThreadPool::Yield( unsigned timeout )
662-
{
663-
// @MULTICORE (toml 10/24/2006): not implemented
664-
Assert( ThreadInMainThread() );
665-
if ( !ThreadInMainThread() )
666-
{
667-
ThreadSleep( timeout );
668-
return;
669-
}
670-
ThreadSleep( timeout );
671-
}
672-
673663
//---------------------------------------------------------
674664
// Add a job to the queue
675665
//---------------------------------------------------------

0 commit comments

Comments
 (0)