Skip to content

Commit fa6b0b7

Browse files
committed
Merge branch 'internal_master_prm' into dev
2 parents 652389a + ad699ad commit fa6b0b7

File tree

134 files changed

+27512
-100977
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

134 files changed

+27512
-100977
lines changed

cmake/PalCompileDefinitions.cmake

+2-1
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ function(pal_compile_definitions_gpu TARGET)
7979
if(PAL_BUILD_GFX9)
8080
pal_compile_definitions_gfx9(${TARGET})
8181
endif()
82+
8283
endif()
8384
endfunction()
8485

@@ -104,7 +105,7 @@ function(pal_compile_definitions TARGET)
104105
)
105106

106107
# If this build is part of a release branch, define the variable
107-
if (DEFINED PAL_BUILD_BRANCH)
108+
if (PAL_BUILD_BRANCH)
108109
target_compile_definitions(${TARGET} PRIVATE PAL_BUILD_BRANCH=${PAL_BUILD_BRANCH})
109110
endif()
110111

cmake/PalCompilerOptions.cmake

-11
Original file line numberDiff line numberDiff line change
@@ -79,17 +79,6 @@ function(pal_compiler_options TARGET)
7979
-fno-rtti
8080
>)
8181

82-
# Align the stack pointer on a 64 byte boundary (2^6) on compilers that support it.
83-
# This was added to resolve some issues after enabling SSE.
84-
# These options are specific to GCC and Clang but also may not exist on certain CPU archs.
85-
check_cxx_compiler_flag(-mpreferred-stack-boundary=6 HAS_STACK_BOUNDARY)
86-
check_cxx_compiler_flag(-mstack-alignment=64 HAS_STACK_ALIGNMENT)
87-
if (HAS_STACK_BOUNDARY)
88-
target_compile_options(${TARGET} PRIVATE -mpreferred-stack-boundary=6)
89-
elseif(HAS_STACK_ALIGNMENT)
90-
target_compile_options(${TARGET} PRIVATE -mstack-alignment=64)
91-
endif()
92-
9382
# If we're using a build type that generates debug syms, compress them to save significant disk space.
9483
check_cxx_compiler_flag(-gz HAS_COMPRESSED_DEBUG)
9584
if (HAS_COMPRESSED_DEBUG)

cmake/PalOptions.cmake

+5-4
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,11 @@ option(PAL_BUILD_DRI3 "Build PAL with DRI3 support?" ON)
5454
option(PAL_BUILD_WAYLAND "Build PAL with WAYLAND support?" OFF)
5555

5656
# Paths to PAL's dependencies
57-
set(PAL_METROHASH_PATH ${PROJECT_SOURCE_DIR}/src/util/imported/metrohash CACHE PATH "Specify the path to the MetroHash project.")
58-
set( PAL_CWPACK_PATH ${PROJECT_SOURCE_DIR}/src/util/imported/cwpack CACHE PATH "Specify the path to the CWPack project.")
59-
set( PAL_VAM_PATH ${PROJECT_SOURCE_DIR}/src/core/imported/vam CACHE PATH "Specify the path to the VAM project.")
60-
set( PAL_ADDR_PATH ${PROJECT_SOURCE_DIR}/src/core/imported/addrlib CACHE PATH "Specify the path to the ADDRLIB project.")
57+
set(PAL_METROHASH_PATH ${PROJECT_SOURCE_DIR}/src/util/imported/metrohash CACHE PATH "Specify the path to the MetroHash project.")
58+
set( PAL_CWPACK_PATH ${PROJECT_SOURCE_DIR}/src/util/imported/cwpack CACHE PATH "Specify the path to the CWPack project.")
59+
set( PAL_VAM_PATH ${PROJECT_SOURCE_DIR}/src/core/imported/vam CACHE PATH "Specify the path to the VAM project.")
60+
set( PAL_ADDR_PATH ${PROJECT_SOURCE_DIR}/src/core/imported/addrlib CACHE PATH "Specify the path to the ADDRLIB project.")
61+
set( PAL_SWD_PATH ${PROJECT_SOURCE_DIR}/src/core/imported/SwWarDetection CACHE PATH "Specify the path to the SwWarDetection project.")
6162

6263
set(PAL_GPUOPEN_PATH "default" CACHE PATH "Specify the path to the GPUOPEN_PATH project.")
6364

doc/process/palCodingStandards.md

+28
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,27 @@ documentation:
408408
***should*** use the capabilities of doxygen as appropriate with a
409409
focus on generating good-looking, useful documentation.
410410
411+
Use multi-line style Doxygen comments for file headers and class declarations.
412+
```cpp
413+
/**
414+
***********************************************************************************************************************
415+
* @brief Summary of what the following body of code does.
416+
*
417+
* A more detailed description goes here.
418+
***********************************************************************************************************************
419+
*/
420+
```
421+
422+
Use triple-slash style Doxygen comments for functions, methods, enums, structs, and unions.
423+
```cpp
424+
/// @brief Summary of what the following body of code does.
425+
///
426+
/// @param [in] parameterName0 Description of this input parameter.
427+
/// @param [out] parameterName1 Description of this output parameter.
428+
///
429+
/// @returns Omit if this isn't a function/method, or it's a void one.
430+
```
431+
411432
### Internal code
412433

413434
Internal PAL code (anything not in inc/, and private members of classes
@@ -485,6 +506,13 @@ class Platform
485506
synchronization assumptions made by a class or function (i.e., this
486507
function should *never* be called from multiple threads, etc.).
487508

509+
#### Motivation
510+
The PAL team decided on this unique commenting style universally. We used to work on drivers that required full Doxygen comments in every file. It was a poor experience because:
511+
512+
- If we're going to use Doxygen, we want to do it right. That means exhaustively commenting every function parameter and every member variable. This is a huge waste of time when almost all member variables and function parameters are trivially understandable based on their name alone. Otherwise, a simple informal block comment above the variables or above the function gets the point across.
513+
- Doxygen is meant to generate docs, and such the formatting is rather tedious and complex if you're never going to generate those docs. The only thing worth formally documenting in PAL is the interface, everything else is just "read the code" documented. Thus we're much better served with a very plain style that just gets the point across internally.
514+
- The only good thing about Doxygen is the generated docs. We assume client driver engineers would generate Doxygen docs to help them read up on our interface headers. This is the only reason we still Doxygen for the public interface.
515+
488516
<!--
489517
-->
490518
Types and Declarations

inc/core/palCmdBuffer.h

+46-10
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ enum class PrimitiveTopology : uint32
8686
LineStrip = 0x2,
8787
TriangleList = 0x3,
8888
TriangleStrip = 0x4,
89-
RectList = 0x5,
89+
RectList = 0x5, ///< Each rect is three 2D axis-aligned rectangle vertices.
9090
QuadList = 0x6,
9191
QuadStrip = 0x7,
9292
LineListAdj = 0x8,
@@ -96,7 +96,9 @@ enum class PrimitiveTopology : uint32
9696
Patch = 0xC,
9797
TriangleFan = 0xD,
9898
LineLoop = 0xE,
99-
Polygon = 0xF
99+
Polygon = 0xF,
100+
TwoDRectList = 0x10 ///< Each rect is the bounding box of an arbitrary 2D triangle.
101+
/// Support is optional, see support2DRectList in DeviceProperties.
100102
};
101103

102104
/// Specifies how triangle primitives should be rasterized.
@@ -1424,6 +1426,16 @@ typedef void (PAL_STDCALL *CmdDispatchOffsetFunc)(
14241426
uint32 yDim,
14251427
uint32 zDim);
14261428

1429+
/// @internal Function pointer type definition for issuing dynamic compute dispatches.
1430+
///
1431+
/// @see ICmdBuffer::CmdDispatchDynamic().
1432+
typedef void (PAL_STDCALL* CmdDispatchDynamicFunc)(
1433+
ICmdBuffer* pCmdBuffer,
1434+
gpusize gpuVa,
1435+
uint32 xDim,
1436+
uint32 yDim,
1437+
uint32 zDim);
1438+
14271439
/// @internal Function pointer type definition for issuing direct mesh dispatches.
14281440
///
14291441
/// @see ICmdBuffer::CmdDispatchMesh().
@@ -1786,13 +1798,14 @@ struct CmdBufInfo
17861798
{
17871799
struct
17881800
{
1789-
uint32 isValid : 1; ///< Indicate if this CmdBufInfo is valid and should be submitted
1790-
uint32 frameBegin : 1; ///< First command buffer after Queue creation or Present.
1791-
uint32 frameEnd : 1; ///< Last command buffer before Present.
1792-
uint32 p2pCmd : 1; ///< Is P2P copy command. See CmdBufInfo comments for details.
1793-
uint32 captureBegin : 1; ///< This command buffer begins a Direct Capture frame capture.
1794-
uint32 captureEnd : 1; ///< This command buffer ends a Direct Capture frame capture.
1795-
uint32 reserved : 26; ///< Reserved for future usage.
1801+
uint32 isValid : 1; ///< Indicate if this CmdBufInfo is valid and should be submitted
1802+
uint32 frameBegin : 1; ///< First command buffer after Queue creation or Present.
1803+
uint32 frameEnd : 1; ///< Last command buffer before Present.
1804+
uint32 p2pCmd : 1; ///< Is P2P copy command. See CmdBufInfo comments for details.
1805+
uint32 captureBegin : 1; ///< This command buffer begins a Direct Capture frame capture.
1806+
uint32 captureEnd : 1; ///< This command buffer ends a Direct Capture frame capture.
1807+
uint32 rayTracingExecuted : 1; ///< This command buffer contains ray tracing work.
1808+
uint32 reserved : 25; ///< Reserved for future usage.
17961809
};
17971810
uint32 u32All; ///< Flags packed as uint32.
17981811
};
@@ -2302,7 +2315,7 @@ class ICmdBuffer : public IDestroyable
23022315
///
23032316
/// @param [in] rasterizerDiscardEnable Parameters for dynamically setting rasterizer discard enable bit
23042317
virtual void CmdSetRasterizerDiscardEnable(
2305-
bool rasterizerDiscardEnable) = 0;
2318+
bool rasterizerDiscardEnable) = 0;
23062319

23072320
/// Inserts a barrier in the current command stream that can stall GPU execution, flush/invalidate caches, or
23082321
/// decompress images before further, dependent work can continue in this command buffer.
@@ -2722,6 +2735,28 @@ class ICmdBuffer : public IDestroyable
27222735
m_funcTable.pfnCmdDispatchOffset(this, xOffset, yOffset, zOffset, xDim, yDim, zDim);
27232736
}
27242737

2738+
/// Dispatches a compute workload of the given dimensions using the command buffer's currently bound compute state
2739+
/// and dynamic pipeline state from GPU memory. The memory address provided contains the gpuVa of the pipeline
2740+
/// launch descriptor previously obtained by calling @ref IPipeline::CreateLaunchDescriptor on a pipeline
2741+
/// that supports dynamic dispatch (@see PipelineCreateFlags)
2742+
///
2743+
/// The thread group size is defined in the compute shader.
2744+
///
2745+
/// @note DynamicComputeShaderInfo.ldsBytesPerTg is not applicable to dynamic launch descriptors.
2746+
///
2747+
/// @param [in] gpuVa GPU virtual address of memory containing pipeline launch descriptor address.
2748+
/// @param [in] xDim Thread groups to dispatch in the X dimension. If zero, the dispatch will be discarded.
2749+
/// @param [in] yDim Thread groups to dispatch in the Y dimension. If zero, the dispatch will be discarded.
2750+
/// @param [in] zDim Thread groups to dispatch in the Z dimension. If zero, the dispatch will be discarded.
2751+
PAL_INLINE void CmdDispatchDynamic(
2752+
gpusize gpuVa,
2753+
uint32 xDim,
2754+
uint32 yDim,
2755+
uint32 zDim)
2756+
{
2757+
m_funcTable.pfnCmdDispatchDynamic(this, gpuVa, xDim, yDim, zDim);
2758+
}
2759+
27252760
/// Dispatches a mesh shader workload using the command buffer's currently bound graphics state. It is an error if
27262761
/// the currently bound graphics pipeline does not contain a mesh and/or task shader.
27272762
///
@@ -4048,6 +4083,7 @@ class ICmdBuffer : public IDestroyable
40484083
CmdDispatchFunc pfnCmdDispatch; ///< CmdDispatch function pointer.
40494084
CmdDispatchIndirectFunc pfnCmdDispatchIndirect; ///< CmdDispatchIndirect function pointer.
40504085
CmdDispatchOffsetFunc pfnCmdDispatchOffset; ///< CmdDispatchOffset function pointer.
4086+
CmdDispatchDynamicFunc pfnCmdDispatchDynamic; ///< CmdDispatchDynamic function pointer.
40514087
CmdDispatchMeshFunc pfnCmdDispatchMesh; ///< CmdDispatchmesh function pointer.
40524088
CmdDispatchMeshIndirectMultiFunc pfnCmdDispatchMeshIndirectMulti; ///< CmdDispatchMeshIndirect function pointer.
40534089
} m_funcTable; ///< Function pointer table for Cmd* functions.

inc/core/palDeveloperHooks.h

+1
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ enum class DrawDispatchType : uint32
389389
CmdDispatchAce, ///< Direct Compute dispatch through implicit ganged-submit ACE stream.
390390
CmdDispatchIndirect, ///< Indirect compute dispatch
391391
CmdDispatchOffset, ///< Direct compute dispatch (offsetted start)
392+
CmdDispatchDynamic, ///< Dynamic compute dispatch
392393

393394
Count,
394395
FirstDispatch = CmdDispatch ///< All callbacks with an enum value greater or equal than this are dispatches

inc/core/palDevice.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,9 @@ struct DeviceProperties
11221122
uint32 maxGsOutputVert; ///< Maximum number of GS output vertices.
11231123
uint32 maxGsTotalOutputComponents; ///< Maximum number of GS output components totally.
11241124

1125+
uint32 dynamicLaunchDescSize; ///< Dynamic launch descriptor size. Zero indicates this feature is not
1126+
/// supported. @ref IPipeline::CreateLaunchDescriptor()
1127+
11251128
RayTracingIpLevel rayTracingIp; ///< HW RayTracing IP version
11261129

11271130
union
@@ -1198,7 +1201,8 @@ struct DeviceProperties
11981201
uint64 supportTextureGatherBiasLod : 1; ///< HW supports SQ_IMAGE_GATHER4_L_O
11991202
uint64 supportInt8Dot : 1; ///< Hardware supports a dot product 8bit.
12001203
uint64 supportInt4Dot : 1; ///< Hardware supports a dot product 4bit.
1201-
uint64 reserved : 21; ///< Reserved for future use.
1204+
uint64 support2DRectList : 1; ///< HW supports PrimitiveTopology::TwoDRectList.
1205+
uint64 reserved : 20; ///< Reserved for future use.
12021206
};
12031207
uint64 u64All; ///< Flags packed as 32-bit uint.
12041208
} flags; ///< Device IP property flags.

inc/core/palLib.h

+25-3
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
/// compatible, it is not assumed that the client will initialize all input structs to 0.
4444
///
4545
/// @ingroup LibInit
46-
#define PAL_INTERFACE_MAJOR_VERSION 669
46+
#define PAL_INTERFACE_MAJOR_VERSION 673
4747

4848
/// Minor interface version. Note that the interface version is distinct from the PAL version itself, which is returned
4949
/// in @ref Pal::PlatformProperties.
@@ -53,7 +53,7 @@
5353
/// of the existing enum values will change. This number will be reset to 0 when the major version is incremented.
5454
///
5555
/// @ingroup LibInit
56-
#define PAL_INTERFACE_MINOR_VERSION 1
56+
#define PAL_INTERFACE_MINOR_VERSION 0
5757

5858
/// Minimum major interface version. This is the minimum interface version PAL supports in order to support backward
5959
/// compatibility. When it is equal to PAL_INTERFACE_MAJOR_VERSION, only the latest interface version is supported.
@@ -90,6 +90,27 @@ class IPlatform;
9090
/// This is a list of GPUs that the NULL OS layer can compile shaders to in offline mode.
9191
enum class NullGpuId : uint32
9292
{
93+
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 672
94+
Polaris10 = 0x00,
95+
Polaris11 = 0x01,
96+
Polaris12 = 0x02,
97+
98+
Vega10 = 0x04,
99+
Raven = 0x05,
100+
Vega12 = 0x06,
101+
Vega20 = 0x07,
102+
Raven2 = 0x08,
103+
Renoir = 0x09,
104+
105+
Navi10 = 0x0A,
106+
Navi12 = 0x0B,
107+
Navi14 = 0x0D,
108+
Navi21 = 0x0F,
109+
Navi22 = 0x10,
110+
111+
Max = 0x1B,
112+
All = 0x1C,
113+
#else // PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 672
93114
Tahiti = 0x00,
94115
Pitcairn = 0x01,
95116
Capeverde = 0x02,
@@ -113,7 +134,7 @@ enum class NullGpuId : uint32
113134
Polaris10 = 0x12,
114135
Polaris11 = 0x13,
115136
Polaris12 = 0x14,
116-
Stoney = 0x16,
137+
Stoney = 0x16,
117138

118139
Vega10 = 0x17,
119140
Raven = 0x18,
@@ -130,6 +151,7 @@ enum class NullGpuId : uint32
130151

131152
Max = 0x2E,
132153
All = 0x2F,
154+
#endif
133155
};
134156

135157
/// Maps a null GPU ID to its associated text name.

inc/core/palPipeline.h

+29-4
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,18 @@ union PipelineCreateFlags
154154
{
155155
struct
156156
{
157-
uint32 clientInternal : 1; ///< Internal pipeline not created by the application.
157+
uint32 clientInternal : 1; ///< Internal pipeline not created by the application.
158158
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 631
159-
uint32 overrideGpuHeap : 1; ///< Override the default GPU heap (local invisible) the pipeline resides in.
160-
uint32 reserved : 30; ///< Reserved for future use.
159+
uint32 overrideGpuHeap : 1; ///< Override the default GPU heap (local invisible) the pipeline
160+
/// resides in.
161+
uint32 reserved : 30; ///< Reserved for future use.
162+
#elif PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 673
163+
uint32 supportDynamicDispatch : 1; ///< Pipeline will be used with @ref ICmdBuffer::CmdDynamicDispatch.
164+
/// This flag must only be set if the device reports support
165+
/// via DeviceProperties.
166+
uint32 reserved : 30; ///< Reserved for future use.
161167
#else
162-
uint32 reserved : 31; ///< Reserved for future use.
168+
uint32 reserved : 31; ///< Reserved for future use.
163169
#endif
164170
};
165171
uint32 u32All; ///< Flags packed as 32-bit uint.
@@ -549,6 +555,25 @@ class IPipeline : public IDestroyable
549555
size_t* pSize,
550556
void* pBuffer) = 0;
551557

558+
/// Creates a new dynamic launch descriptor for this pipeline. These descriptors are only usable as input to
559+
/// @ref ICmdBuffer::CmdDispatchDynamic(). Each launch descriptor acts as a GPU-side "handle" to a pipeline and
560+
/// a set of shader libraries it is linked with. The size of the launch descriptor can be queried from
561+
/// @ref DeviceProperties. A size of 0 reported in DeviceProperties indicates that this feature is not supported.
562+
///
563+
/// Currently only supported on compute pipelines.
564+
///
565+
/// @param [in, out] pOut Launch descriptor to create or update. Must not be null.
566+
/// @param [in] resolve The launch descriptor contains state from a previous link operation. Need to update
567+
/// the descriptor during this operation.
568+
///
569+
/// @returns Success if the operation was successful. Other error codes may include:
570+
/// + ErrorUnavailable if called on a graphics pipeline or a pipeline that does not support dynamic
571+
/// launch. @ref PipelineCreateFlags
572+
/// + ErrorInvalidPointer if pCpuAddr is null.
573+
virtual Result CreateLaunchDescriptor(
574+
void* pCpuAddr,
575+
bool resolve) = 0;
576+
552577
/// Notifies PAL that this pipeline may make indirect function calls to any function contained within any of the
553578
/// specified @ref IShaderLibrary objects. This gives PAL a chance to perform any late linking steps required to
554579
/// valid execution of the possible function calls (this could include adjusting hardware resources such as GPRs

0 commit comments

Comments
 (0)