-
Notifications
You must be signed in to change notification settings - Fork 89
/
palDevice.h
5595 lines (5109 loc) · 320 KB
/
palDevice.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2024 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palDevice.h
* @brief Defines the Platform Abstraction Library (PAL) IDevice interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "palCmdAllocator.h"
#include "palDestroyable.h"
#include "palFence.h"
#include "palFile.h"
#include "palGpuMemory.h"
#include "palImage.h"
#include "palInlineFuncs.h"
#include "palLib.h"
#include "palPerfExperiment.h"
#include "palPipeline.h"
#include "palQueue.h"
#include <chrono>
namespace Util
{
class Event;
}
namespace Pal
{
// Forward declarations.
class IBorderColorPalette;
class ICmdAllocator;
class ICmdBuffer;
class IColorBlendState;
class IColorTargetView;
class IDepthStencilState;
class IDepthStencilView;
class IDevice;
class IFence;
class IGpuEvent;
class IGpuMemory;
class IImage;
class IIndirectCmdGenerator;
class IMsaaState;
class IPerfExperiment;
class IPipeline;
class IPrivateScreen;
class IQueryPool;
class IQueue;
class IQueueSemaphore;
class IShaderLibrary;
class ISwapChain;
struct BorderColorPaletteCreateInfo;
struct CmdAllocatorCreateInfo;
struct CmdBufferCreateInfo;
struct ColorBlendStateCreateInfo;
struct ColorTargetViewCreateInfo;
struct ComputePipelineCreateInfo;
struct DepthStencilStateCreateInfo;
struct DepthStencilViewCreateInfo;
struct ExternalImageOpenInfo;
struct ExternalGpuMemoryOpenInfo;
struct ExternalQueueSemaphoreOpenInfo;
struct ExternalResourceOpenInfo;
struct GpuEventCreateInfo;
struct GpuMemoryCreateInfo;
struct GpuMemoryOpenInfo;
struct GpuMemoryRef;
struct GraphicsPipelineCreateInfo;
struct ImageCreateInfo;
struct IndirectCmdGeneratorCreateInfo;
struct MsaaStateCreateInfo;
struct MsaaQuadSamplePattern;
struct PeerGpuMemoryOpenInfo;
struct PeerImageOpenInfo;
struct PerfExperimentCreateInfo;
struct PinnedGpuMemoryCreateInfo;
struct PresentableImageCreateInfo;
struct PrivateScreenCreateInfo;
struct PrivateScreenNotifyInfo;
struct QueryPoolCreateInfo;
struct QueueCreateInfo;
struct QueueSemaphoreCreateInfo;
struct QueueSemaphoreOpenInfo;
struct ShaderLibraryCreateInfo;
struct SwapChainCreateInfo;
struct SwapChainProperties;
struct SvmGpuMemoryCreateInfo;
struct GraphicPipelineViewInstancingInfo;
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 881
enum class WsiPlatform : uint32;
#else
enum WsiPlatform : uint32;
#endif
enum class PipelineBindPoint : uint32;
enum class VaRange : uint32;
/// Maximum string length for GPU names. @see DeviceProperties.
constexpr uint32 MaxDeviceName = 256;
/// Maximum number of indirect user-data tables managed by PAL's command buffer objects. @see DeviceFinalizeInfo.
constexpr uint32 MaxIndirectUserDataTables = 1;
/// Maximum number of supported entries in the MSAA sample pattern palette. See IDevice::SetSamplePatternPalette().
constexpr uint32 MaxSamplePatternPaletteEntries = 16;
/// Maximum number of supported units in the gpu. These can be much larger than the actual values, but useful for arrays.
constexpr uint32 MaxShaderEngines = 32;
/// Maximum number of supported subunits each Shader Engine splits into (SH or SA, depending on generation)
constexpr uint32 MaxShaderArraysPerSe = 2;
/// Size of the Active Pixel Packer Mask in DWORDs
constexpr uint32 ActivePixelPackerMaskDwords = 4;
/// Maximum number of pixel packers per SE expected by PAL
constexpr uint32 MaxPixelPackerPerSe = 4;
/// Defines host flags for Semaphore/Fence Array wait
enum HostWaitFlags : uint32
{
HostWaitAny = 0x1, ///< if set this bit, return after any signle semaphore/fence in the array has
/// completed. if not set, wait for completion of all semaphores/fences in the
/// array before returning.
};
/// Specifies what type of GPU a particular IDevice is (i.e., discrete vs. integrated).
enum class GpuType : uint32
{
Unknown = 0x0, ///< The GPU type can't be determined and is unknown.
Integrated = 0x1, ///< Integrated GPU (i.e., APU).
Discrete = 0x2, ///< Discrete GPU.
Virtual = 0x3, ///< Virtualized GPU.
Count
};
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 888
/// Specifies which operating-system-support IP level (OSSIP) this device has.
enum class OssIpLevel : uint32
{
_None = 0x0, ///< @internal The device does not have an OSSIP block, or its level cannot be determined
// Unfortunately for Linux clients, X.h includes a "#define None 0" macro. Clients have their choice of either
// undefing None before including this header or using _None when dealing with PAL.
#ifndef None
None = _None, ///< The device does not have an OSSIP block, or its level cannot be determined
#endif
#if PAL_BUILD_OSS2_4
OssIp2_4 = 0x3,
#endif
OssIp4 = 0x4,
};
/// Specifies which VCE IP level this device has.
enum class VceIpLevel : uint32
{
_None = 0x0, ///< @internal The device does not have an VCEIP block, or its level cannot be determined
// Unfortunately for Linux clients, X.h includes a "#define None 0" macro. Clients have their choice of either
// undefing None before including this header or using _None when dealing with PAL.
#ifndef None
None = _None, ///< The device does not have an VCEIP block, or its level cannot be determined
#endif
VceIp1 = 0x1,
VceIp2 = 0x2,
VceIp3 = 0x3,
VceIp3_1 = 0x4,
VceIp3_4 = 0x5,
VceIp4 = 0x6,
};
/// Specifies which UVD IP level this device has.
enum class UvdIpLevel : uint32
{
_None = 0x0, ///< @internal The device does not have an UVDIP block, or its level cannot be determined
// Unfortunately for Linux clients, X.h includes a "#define None 0" macro. Clients have their choice of either
// undefing None before including this header or using _None when dealing with PAL.
#ifndef None
None = _None, ///< The device does not have an UVDIP block, or its level cannot be determined
#endif
UvdIp3_2 = 0x1,
UvdIp4 = 0x2,
UvdIp4_2 = 0x2,
UvdIp5 = 0x3,
UvdIp6 = 0x4,
UvdIp6_2 = 0x5,
UvdIp6_3 = 0x6,
UvdIp7 = 0x7,
UvdIp7_2 = 0x8,
};
#endif
/// Specifies which VCN IP level this device has.
enum class VcnIpLevel : uint32
{
_None = 0x0, ///< @internal The device does not have an VCNIP block, or its level cannot be determined
// Unfortunately for Linux clients, X.h includes a "#define None 0" macro. Clients have their choice of either
// undefing None before including this header or using _None when dealing with PAL.
#ifndef None
None = _None, ///< The device does not have an VCNIP block, or its level cannot be determined
#endif
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888
#else // PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888
VcnIp1 = 0x1,
#endif
};
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 888
/// Specifies which SPU IP level this device has.
enum class SpuIpLevel : uint32
{
_None = 0x0, ///< @internal The device does not have an SPUIP block, or its level cannot be determined
#ifndef None
None = _None, ///< The device does not have an SPUIP block, or its level cannot be determined
#endif
SpuIp = 0x1,
};
#endif
/// Specifies which PSP IP level this device has.
enum class PspIpLevel : uint32
{
_None = 0x0, ///< @internal The device does not have an PSPIP block, or its level cannot be determined
#ifndef None
None = _None, ///< The device does not have an PSPIP block, or its level cannot be determined
#endif
PspIp10 = 0x1,
};
/// Specified video decode type
enum class VideoDecodeType : uint32
{
H264 = 0x0, ///< H264 VLD
Vc1 = 0x1, ///< VC1 VLD
Mpeg2Idct = 0x2, ///< Partial MPEG2 decode (IT+MP)
Mpeg2Vld = 0x3, ///< Full MPEG2 decode (RE+IT+MP+DB)
Mpeg4 = 0x4, ///< MPEG4
Wmv9 = 0x5, ///< WMV9 IDCT
Mjpeg = 0x6, ///< Motion JPEG
Hevc = 0x7, ///< HEVC
Vp9 = 0x8, ///< VP9
Hevc10Bit = 0x9, ///< HEVC 10bit
Vp910Bit = 0xa, ///< VP9 10bit
Av1 = 0xb, ///< AV1 8/10bit
Av112Bit = 0xc, ///< AV1 12bit
Count,
};
/// Video CODEC to use for encoding
enum class VideoEncodeCodec : uint32
{
H264 = 0x0, ///< H.264
H265 = 0x1, ///< H.265
Av1 = 0x2, ///< AV1
Count
};
/// Specifies a virtual address range memory should be allocated in.
enum class VaRange : uint32
{
Default, ///< Default VA range. Choose this for most allocations.
DescriptorTable, ///< Place the allocation in a 4GB VA range reserved by PAL for descriptor tables. Knowing
/// an allocation is allocated in this range, only one user data entry is required to
/// specify a descriptor table. @see ResourceMappingNodeType.
ShadowDescriptorTable, ///< Place the allocation in a 4GB VA range reserved by PAL for "shadow" descriptor tables.
/// A shadow descriptor table is an additional table with the same layout as its parent
/// descriptor table that can hold infrequently needed data like fmask SRDs or UAV counter
/// data. This scheme allows the client and SC to work out a known location for
/// infrequently needed data without wasting a user data entry or wasting half of every
/// descriptor cache line.
/// Only supported if DeviceProperties::gpuMemoryProperties::flags::shadowDescVaSupport is
/// set.
Svm, ///< Place the allocation in a VA range reserved by PAL for shared virtual memory(SVM).
/// This is a GPU VA range that is reserved also on the CPU-side.
/// The size of reserved VA is set by PAL client by calling CreatePlatform.
CaptureReplay, ///< Place the allocation in a VA range reserved for capture and playback.
Count,
};
/// Enumerates tmz(trusted memory zone) support level.
enum class TmzSupportLevel : uint32
{
None = 0, ///< TMZ not supported.
PerQueue = 1, ///< Enable TMZ mode per queue.
PerSubmission = 2, ///< Enable TMZ mode per submission.
PerCommandOp = 3 ///< Enable TMZ mode per command operation.
};
/// How to interpret a single bit in a swizzle equation.
union SwizzleEquationBit
{
struct
{
uint8 valid : 1; ///< Indicates whether this channel setting is valid.
uint8 channel : 2; ///< 0 for x channel, 1 for y channel, 2 for z channel.
uint8 index : 5; ///< The channel index.
};
uint8 u8All; ///< The above values packed in an 8-bit uint.
};
constexpr uint32 SwizzleEquationMaxBits = 20; ///< Swizzle equations will consider no more than this many bits.
constexpr uint8 InvalidSwizzleEqIndex = 0xFF; ///< Indicates an invalid swizzle equation index in the equation table.
constexpr uint8 LinearSwizzleEqIndex = 0xFE; ///< An invalid eq. index indicating a row-major, linear memory layout.
/// Texture fetch meta-data capabilities bitfield definition, used with tcCompatibleMetaData setting
enum TexFetchMetaDataCaps : uint32
{
TexFetchMetaDataCapsNoAaColor = 0x00000001,
TexFetchMetaDataCapsMsaaColor = 0x00000002,
TexFetchMetaDataCapsFmask = 0x00000004,
TexFetchMetaDataCapsNoAaDepth = 0x00000008,
TexFetchMetaDataCapsMsaaDepth = 0x00000010,
TexFetchMetaDataCapsAllowStencil = 0x00000020,
TexFetchMetaDataCapsAllowZ16 = 0x00000040,
};
/// Catalyst AI setting enums
enum CatalystAiSettings : uint32
{
CatalystAiDisable = 0,
CatalystAiEnable = 1,
CatalystAiMaximum = 2,
};
/// Texture Filter optimization enum values
enum TextureFilterOptimizationSettings : uint32
{
TextureFilterOptimizationsDisabled = 0,
TextureFilterOptimizationsEnabled = 1,
TextureFilterOptimizationsAggressive = 2,
};
/// Distribution Tess Mode enum values
enum DistributionTessMode : uint32
{
DistributionTessOff = 0,
DistributionTessDefault = 1,
DistributionTessPatch = 2,
DistributionTessDonut = 3,
DistributionTessTrapezoid = 4,
DistributionTessTrapezoidOnly = 5,
};
/// Defines the context roll optimization flags
enum ContextRollOptimizationFlags : uint32
{
OptFlagNone = 0x00000000,
PadParamCacheSpace = 0x00000001,
};
/// Defines the initial value to use for DCC metadata
enum class DccInitialClearKind {
Uncompressed = 0x0,
OpaqueBlack = 0x1,
OpaqueWhite = 0x2,
ForceBit = 0x10,
ForceOpaqueBlack = (ForceBit | OpaqueBlack),
ForceOpaqueWhite = (ForceBit | OpaqueWhite),
};
/// Enum defining the different scopes (i.e. registry locations) where settings values are stored
enum InternalSettingScope : uint32
{
PrivateDriverKey = 0x0,
PublicPalKey = 0x1,
PrivatePalKey = 0x2,
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888
PublicCatalystKey = 0x3,
PrivatePalGfx9Key = 0x4,
#else // PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888
PrivatePalGfx6Key = 0x3,
PrivatePalGfx9Key = 0x4,
PublicCatalystKey = 0x5,
#endif
};
/// Enum defining override states for feature settings.
enum class FeatureOverride : uint32
{
Default = 0, ///< Default setting state.
Enabled = 1, ///< (Force) enabled state. Default may change itself to this state.
Disabled = 2 ///< (Force) disabled state. Default may change itself to this state.
};
/// Enum bitmask defining externally-controlled (e.g. by Radeon Settings/KMD) driver feature settings.
enum RsFeatureType : uint32
{
RsFeatureTypeTurboSync = (1u << 0),
RsFeatureTypeChill = (1u << 1),
RsFeatureTypeDelag = (1u << 2),
RsFeatureTypeBoost = (1u << 4),
RsFeatureTypeProVsr = (1u << 5),
};
/// Output structure containing information about the requested RsFeatureType (singular).
union RsFeatureInfo
{
/// Global TurboSync settings.
struct
{
bool enabled; ///< Specifies whether TurboSync is enabled globally.
} turboSync;
/// Global Chill settings.
struct
{
bool enabled; ///< Specifies whether Chill is enabled globally.
uint32 hotkey; ///< If nonzero, specifies the virtual key code assigned to Chill.
uint32 minFps; ///< Specifies the global Chill minimum FPS limit.
uint32 maxFps; ///< Specifies the global Chill maximum FPS limit.
} chill;
/// Global Delag settings.
struct
{
bool enabled; ///< Specifies whether Delag is enabled globally.
uint32 hotkey; ///< If nonzero, specifies the virtual key code assigned to Delag.
uint32 hotkeyInd;///< If nonzero, specifies the virtual key code assigned to Delag's indicator.
uint32 limitFps; ///< Specifies the global Delag FPS limit.
uint32 level; ///< Specifies the global Delag level.
} delag;
/// Global Boost settings.
struct
{
bool enabled; ///< Specifies whether Boost is enabled globally.
uint32 hotkey; ///< If nonzero, specifies the virtual key code assigned to Boost.
uint32 hotkeyInd;///< If nonzero, specifies the virtual key code assigned to Boost's indicator.
uint32 minRes; ///< Specifies the global Boost minimum resolution.
} boost;
/// Global ProVsr settings.
struct
{
bool enabled; ///< Specifies whether ProVsr is enabled globally.
uint32 hotkey; ///< If nonzero, specifies the virtual key code assigned to ProVsr.
} proVsr;
};
/// High-dynamic range (HDR) surface display modes. Used to indicate the HDR display standard for a particular swap
/// chain texture format and screen colorspace/transfer function combination.
enum class HdrDisplayMode : uint32
{
Sdr = 0, ///< Standard dynamic range; non-HDR compatible (default).
Hdr10 = 1, ///< HDR10 PQ. Requires 10:10:10:2 swap chain.
ScRgb = 2, ///< scRGB HDR (Microsoft and FreeSync2 linear mode). 1.0 = 80 nits, 125.0 = 10000 nits.
/// Requires FP16 swapchain.
FreeSync2 = 3, ///< FreeSync2 HDR10 Gamma 2.2. Requires 10:10:10:2 swap chain.
};
static constexpr uint32 MaxMiscStrLen = 61;
/// Whether to use graphics or compute for performing fast clears on depth stencil views.
enum class FastDepthStencilClearMode : uint8
{
Default, ///< Compute or graphics will be chosen at the driver's discretion
Graphics, ///< Graphics will always be used
Compute ///< Compute will always be used
};
enum DeferredBatchBinMode : uint32
{
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 875
DeferredBatchBinCustom = 0,
DeferredBatchBinAccurate = 1
#else
DeferredBatchBinDisabled = 0,
DeferredBatchBinCustom = 1,
DeferredBatchBinAccurate = 2
#endif
};
/// PWS enable mode: e.g. disabled, fully enabled or partially enabled.
enum class PwsMode : uint32
{
Disabled = 0, ///< PWS feature is disabled
Enabled = 1, ///< PWS feature is fully enabled if HW supports.
NoLateAcquirePoint = 2 ///< PWS feature is enabled with PWS counter only if HW supports, no late acquire points.
};
#if defined(__unix__)
/// Defines the supported VM always valid modes.
enum VmAlwaysValidEnable : uint32
{
VmAlwaysValidDefaultEnable = 0, ///< Only enable the optimization when kernel driver can support
VmAlwaysValidForceDisable = 1, ///< Force the optimization always disabled
VmAlwaysValidForceEnable = 2 ///< Force the optimization always enabled
};
#endif
/// Alignment mode for accessing graphics/compute buffers.
/// This doesn't change the alignment of the whole buffer, just the alignment at which accesses must be made.
enum class BufferAlignmentMode : uint8
{
Default, ///< KMD (and therefore PAL) picks alignment requirement. Client should be prepared for anything.
Dword, ///< Hardware will automatically align requests to the smaller of: element-size or DWORD.
Unaligned ///< Any request alignment is allowed.
};
/// Pal settings that are client visible and editable.
struct PalPublicSettings
{
/// Maximum border color palette size supported by any queue.
uint32 borderColorPaletteSizeLimit;
/// Whether to use graphics or compute for performing fast clears on depth stencil views.
FastDepthStencilClearMode fastDepthStencilClearMode;
/// Forces all serialized loads (LoadPipeline or LoadCompoundState) to fail.
bool forceLoadObjectFailure;
/// Controls the distribution mode for tessellation, which affects how patches are processed by different VGT
/// units. 0: None - No distribution across VGTs (legacy mode). 1: Default - Optimal settings are chosen depending
/// on the gfxip. 2: Patch - Individual patches are distributed to different VGTs. 3: Donut - Patches are split
/// into donuts and distributed to different VGTs. 4: Trapezoid - Patches from donuts are split into trapezoids and
/// distributed to different VGTs. Falls back to donut mode if HW does not support this mode. 5: Trapezoid only -
/// Distribution turned off if HW does not support this mode.
uint32 distributionTessMode;
/// Flags that control PAL optimizations to reduce context rolls. 0: Optimization disabled. 1: Pad parameter cache
/// space. Sets VS export count and PS interpolant number to per-command buffer maximum value. Reduces context rolls
/// at the expense of parameter cache space.
uint32 contextRollOptimizationFlags;
/// The number of unbound descriptor debug srds to allocate. To detect reads of unbound descriptor within arrays,
/// multiple debug srds can be allocated.
uint32 unboundDescriptorDebugSrdCount;
/// Disables compilation of internal PAL shaders. It can be enabled only if a PAL client won't use any of PAL blit
/// functionalities on gfx/compute engines.
bool disableResourceProcessingManager;
/// Controls app detect and image quality altering optimizations exposed by CCC.
uint32 catalystAI;
/// Controls texture filtering optimizations exposed by CCC.
uint32 textureOptLevel;
/// Disables SC initialization. It can be enabled only if a PAL client won't use SC for shader compilation and
/// provide direct ISA binaries(usually AQL path).
bool disableScManager;
/// Information about the client performing the rendering. For example: Rendered By PAL (0.0.1)
char renderedByString[MaxMiscStrLen];
/// Debug information that the client or tester might want reported.
char miscellaneousDebugString[MaxMiscStrLen];
/// Allows SC to make optimizations at the expense of IEEE compliance.
bool allowNonIeeeOperations;
/// Controls whether shaders should execute one atomic instruction per wave for UAV append/consume operations.
/// If false, one atomic will be executed per thread.
bool appendBufPerWaveAtomic;
/// Bitmask of cases where texture compatible meta data will be used Single-sample color surface: 0x00000001 MSAA
/// color surface: 0x00000002 FMask data: 0x00000004 Single-sample depth surface: 0x00000008 MSAA depth surface:
/// 0x00000010 Allow stencil: 0x00000020 Allow Z-16 surfs 0x00000040
uint32 tcCompatibleMetaData;
/// Specifies the threshold below which CmdCopyMemory() is executed via a CpDma BLT, in bytes. CPDMA copies have
/// lower overhead than CS/Gfx copies, but less throughput for large copies.
uint32 cpDmaCmdCopyMemoryMaxBytes;
/// Forces high performance state for allocated queues. Note: currently supported in Windows only.
bool forceHighClocks;
/// When submitting multiple command buffers in a single grQueueSubmit call, the ICD will patch the command streams
/// so that the command buffers are chained together instead of submitting through KMD multiple times. This setting
/// limits the number of command buffers that will be chained together; reduce to prevent problems due to long
/// running submits.
uint32 cmdBufBatchedSubmitChainLimit;
/// Flags that control PAL's command allocator residency optimizations. If a command allocation isn't optimized PAL
/// will wait for it to become resident at creation. 0x1 - Wait for command data to become resident at Submit-time.
/// 0x2 - Wait for embedded data to become resident at Submit-time. 0x4 - Wait for marker data to become resident at
/// Submit-time.
uint32 cmdAllocResidency;
/// Overrides max queued frames allowed
uint32 maxQueuedFrames;
/// Maximum number of presentable images per adapter(including LDA chain) which is recommended. If app exceeds the
/// presentable image number threshold, awarning may be reported.
uint32 presentableImageNumberThreshold;
/// Provides a hint to PAL that client knows that every individual depth stencil surfaces are always cleared with
/// same values.If TRUE, per-tile tracking of exp/clear will be enabled (requires HTile).
bool hintInvariantDepthStencilClearValues;
/// Provides a hint to PAL that PAL should disable color compression on surfaces that are smaller than or equal to
/// this setting (setting * setting) in size.
uint32 hintDisableSmallSurfColorCompressionSize;
/// Disables Escape call to KMD. This is a temporary setting for experimentation that is expected to break features
/// that currently needs Escape call.
bool disableEscapeCall;
/// A hint to the Windows OS that this application has submissions that are expected to run for a long time. This
/// tells the OS that checking elapsed execution time is not a good way to judge if the GPU is hung.
///
/// If this flag is set, Windows shouldn't TDR long submissions on uncontested hardware queues. Submissions on
/// contested queues must preempt within the time limit to avoid a TDR. Setting this may also extend the TDR timer
/// on compute queues.
bool longRunningSubmissions;
/// Disables MCBP on demand. This is a temporary setting until ATOMIC_MEM packet issue with MCBP is resolved.
bool disableCommandBufferPreemption;
/// Disable the fast clear eliminate skipping optimization. This optimization will conservatively track the usage
/// of clear values to allow the vast majority of images that never clear to a value that isn't TC-compatible to
/// skip the CPU and front-end GPU overhead of issuing a predicated fast clear eliminate BLT.
bool disableSkipFceOptimization;
/// Sets the minimum BPP of surfaces which will have DCC enabled
uint32 dccBitsPerPixelThreshold;
/// See largePageSizeInBytes in DeviceProperties. This limit defines how large an allocation must be to have
/// PAL automatically pad allocation starting virtual address alignments to enable this optimization. By
/// default, PAL will use the KMD-reported limit.
gpusize largePageMinSizeForVaAlignmentInBytes;
/// See largePageSizeInBytes in DeviceProperties. This limit defines how large an allocation must be to have
/// PAL automatically pad allocation sizes to fill an integral number of large pages. By default, PAL will
/// use the KMD-reported limit.
gpusize largePageMinSizeForSizeAlignmentInBytes;
/// Makes the unbound descriptor debug srd 0 so the hardware drops the load and ignores it instead of pagefaulting.
/// Used to workaround incorrect app behavior.
bool zeroUnboundDescDebugSrd;
/// Preferred heap for uploading client pipelines. Default is set to @ref GpuHeap::GpuHeapInvisible. Setting is
/// ignored for internal pipelines and are uploaded to @ref GpuHeap::GpuHeapLocal.
GpuHeap pipelinePreferredHeap;
bool depthClampBasedOnZExport;
/// Force the PreColorTarget to an earlier PreRasterization point if used as a wait point. This is to prevent a
/// write-after-read hazard for a corner case: shader exports from distinct packers are not ordered. Advancing
/// wait point from PreColorTarget to PostPrefetch could cause over-sync due to extra VS/PS_PARTIAL_FLUSH
/// inserted. It is default to false, but client drivers may choose to app-detect to enable if see corruption.
bool forceWaitPointPreColorToPostPrefetch;
/// Allows the client to disable debug overlay visual confirm after DebugOverlay::Platform is created when the
/// panel setting DebugOverlayEnabled is globally set but a certain application might need to turn off visual
/// confirm to make the screen not too noisy.
bool disableDebugOverlayVisualConfirm;
bool enableExecuteIndirectPacket;
/// Offers flexibility to the client to choose Graphics vs Compute engine for Indirect Command Generation
/// (Shader path) based on performance and other factors. The default is false since we have seen perf gains using
/// the ACE.
bool disableExecuteIndirectAceOffload;
/// Value to initialize metadata for DCC surfaces to, if they are compressable. This has no effect on non-DCC
/// images. Images whose initial layout is not compressable are only affected if this is "forced".
/// 0x00 - Uncompressed (default)
/// 0x01 - Opaque Black
/// 0x02 - Opaque White
/// 0x11 - Forced Opaque Black
/// 0x12 - Forced Opaque White
uint32 dccInitialClearKind;
/// Allows the client to not create internal VrsImage. Pal internal will create a 16M image as vrsImageSize.
bool disableInternalVrsImage;
/// Allows the client to control binning persistent and context states per bin.
/// A value of 0 tells PAL to pick the number of states per bin.
uint32 binningPersistentStatesPerBin;
uint32 binningContextStatesPerBin;
/// This key controls if binning will be disabled when the PS may kill pixels.
OverrideMode disableBinningPsKill;
/// Controls GS LateAlloc val (for pos/prim allocations NOT param cache) on NGG pipelines. Can be no more than 127.
uint32 nggLateAllocGs;
/// Bitmask of cases where RPM view memory accesses will bypass the MALL
/// RpmViewsBypassMallOff (0x0): Disable MALL bypass
/// RpmViewsBypassMallOnRead (0x1): Skip MALL for read access of views created in RPM
/// RpmViewsBypassMallOnWrite (0x2): Skip MALL for write access of views created in RPM
/// RpmViewsBypassMallOnCbDbWrite (0x4): Control the RPM CB/DB behavior
RpmViewsBypassMall rpmViewsBypassMall;
/// Optimize color export format for depth only rendering. Only applicable for RB+ parts
bool optDepthOnlyExportRate;
/// Controls whether or not we should expand Hi-Z to full range rather than doing fine-grain resummarize
/// operations. Expanding Hi-Z leaves the Hi-Z data in a less optimal state but is a much faster operation
/// than the fine-grain resummarize.
bool expandHiZRangeForResummarize;
/// Control whether to have command buffer emit SQTT marker events. Useful for client driver to perform SQTT
/// dump without the involvement of dev driver.
bool enableSqttMarkerEvent;
/// Controls the value of CB_COLOR0_ATTRIB.LIMIT_COLOR_FETCH_TO_256B_MAX. This bit limits CB fetch to 256B on cache
/// miss, regardless of sector size.
bool limitCbFetch256B;
/// Controls whether or not deferred batch binning is enabled 0 : Batch binning always disabled 1 : Use custom bin
/// sizes 2 : Optimal.
DeferredBatchBinMode binningMode;
/// Controls the custom batch bin size.Only used when deferredBatchBinMode == 1 High word is for x, low word is for
/// y. Default is 128x128. Values must be power of two between 16 and 512.
uint32 customBatchBinSize;
/// Maximum number of primitives per batch. The maximum value is 1024.
uint32 binningMaxPrimPerBatch;
/// Controls PWS enable mode: disabled, fully enabled or partially enabled. Only take effect if HW supports PWS.
PwsMode pwsMode;
/// Controls the MaxScratchRingSizeBaseline, which is really just the maximum size of the scratch ring
gpusize maxScratchRingSizeBaseline;
/// Controls the maximum size of the scratch ring allocation
uint32 maxScratchRingSizeScalePct;
#if defined(__unix__)
/// Whether enable vm-always-valid feature on Linux while allocating Bo
VmAlwaysValidEnable enableVmAlwaysValid;
#endif
/// Alignment mode for graphics and compute buffers.
/// Clients must check Pal::DeviceProperties::osProperties::flags::forceAlignmentSupported
/// to see if anything other than default will work.
BufferAlignmentMode hardwareBufferAlignmentMode;
// Disallows putting the shader ring in system memory for performance purposes
// This is done by un-listing GpuHeapGartUswc as a possible heap for the shader rings.
// Instead only allowing GpuHeapInvisible and GpuHeapLocal
bool forceShaderRingToVMem;
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 818
/// If the client sets this to true they promise they've done exhaustive testing on every ASIC to prove that this
/// application can use AC01 fast clears safely. This should never be forced to true unconditionally.
bool ac01WaNotNeeded;
#endif
/// Toggles whether or not image copies will prefer using the graphics pipeline. This setting does not force all
/// copies to use graphics or compute, it changes what method will be selected in cases where either could be used.
bool preferGraphicsImageCopy;
/// Bitmask to control adding Waits around Flush events
/// This is public setting to allow AppDetect to override of 'WaitOnFlush' setting.
/// Setting 'waitOnFlush' for actual workarounds is strongly discouraged because:
/// 1) It has a negative performance impact.
/// 2) Waits effect the timing and pipeline execution which can hide underlying hw/fw/sw bugs.
/// Issues resolved by added waits should be root caused.
uint32 waitOnFlush;
};
/// Defines the modes that the GPU Profiling layer can use when its buffer fills.
enum GpuProfilerStallMode : uint32
{
GpuProfilerStallAlways = 0, ///< Always stall to get accurate trace data
GpuProfilerStallLoseDetail = 1, ///< Lose register-level detail if under pressure to avoid stalls
GpuProfilerStallNever = 2, ///< Never stall, miss trace packets
};
/// Describes the equations needed to interpret the raw memory of a tiled texture.
struct SwizzleEquation
{
SwizzleEquationBit addr[SwizzleEquationMaxBits]; ///< Address setting: each bit is the result of addr ^ xor ^ xor2.
SwizzleEquationBit xor1[SwizzleEquationMaxBits]; ///< xor setting.
SwizzleEquationBit xor2[SwizzleEquationMaxBits]; ///< xor2 setting.
uint32 numBits; ///< The number of bits in the equation.
bool stackedDepthSlices; ///< True if depth slices are treated as being stacked vertically
/// prior to swizzling.
};
/// Specifies the hardware features supported for PRT (sparse images).
enum PrtFeatureFlags : uint32
{
PrtFeatureBuffer = 0x00000001, ///< Indicates support for sparse buffers
PrtFeatureImage2D = 0x00000002, ///< Indicates support for sparse 2D images
PrtFeatureImage3D = 0x00000004, ///< Indicates support for sparse 3D images
PrtFeatureImageMultisampled = 0x00000008, ///< Indicates support for sparse multisampled images
PrtFeatureImageDepthStencil = 0x00000010, ///< Indicates support for sparse depth/stencil images
PrtFeatureShaderStatus = 0x00000020, ///< Indicates support for residency status in shader instructions
PrtFeatureShaderLodClamp = 0x00000040, ///< Indicates support for LOD clamping in shader instructions
PrtFeatureUnalignedMipSize = 0x00000080, ///< Indicates support for non-miptail levels with dimensions that
/// aren't integer multiples of the tile size as long as they are
/// at least as large as a single tile
PrtFeaturePerSliceMipTail = 0x00000100, ///< Indicates support for per-slice miptail (slice-major order)
PrtFeatureTileAliasing = 0x00000200, ///< Indicates support for aliasing tiles (without metadata)
PrtFeatureStrictNull = 0x00000400, ///< Indicates whether reads of unmapped tiles always return zero
PrtFeatureNonStandardImage3D = 0x00000800, ///< Indicates support for sparse 3D images restricted to
/// non-standard tile shapes that match the tile mode block depth
PrtFeaturePrtPlus = 0x00001000, ///< Indicates that this image supports use of residency maps.
};
/// Describe the settings' scope accessible by clients.
enum class SettingScope
{
Driver, ///< For settings specific to a UMD
Global, ///< For global settings controlled by CCC
};
/// Big Software (BigSW) Release information structure
/// Software release management uses this version # to control a rollout of big SW features together.
struct BigSoftwareReleaseInfo
{
uint32 majorVersion; ///< BigSW Release Major version
uint32 minorVersion; ///< BigSW Release Minor version.
uint32 miscControl; ///< BigSW Release miscellaneous control.
};
/// Virtual display capabilities as determined by the OS. The reported values bound the valid ranges of values supported
/// by the @ref VirtualDisplayInfo structure passed in to @ref IDevice::CreateVirtualDisplay.
struct VirtualDisplayCapabilities
{
uint32 maxVirtualDisplays; ///< The maximum number of virtual display supported
Rational minRefreshRate; ///< The minimum refresh rate
Rational maxRefreshRate; ///< The maximum refresh rate
};
/// The properties of a specific virtual display
struct VirtualDisplayProperties
{
bool isVirtualDisplay; ///< True, if it's a virtual display
};
/// Enumerates all of the types of local video memory which could be associated with a GPU.
enum class LocalMemoryType : uint32
{
Unknown = 0,
Ddr2,
Ddr3,
Ddr4,
Gddr5,
Gddr6,
Hbm,
Hbm2,
Hbm3,
Lpddr4,
Lpddr5,
Ddr5,
Count
};
/// Bitmask of all MSAA/EQAA types supported, in terms of samples (S) and shaded fragments (F)
enum MsaaFlags : uint16
{
MsaaS1F1 = 0x0001,
MsaaS2F1 = 0x0002,
MsaaS4F1 = 0x0004,
MsaaS8F1 = 0x0008,
MsaaS16F1 = 0x0010,
MsaaAllF1 = 0x001F,
MsaaS2F2 = 0x0020,
MsaaS4F2 = 0x0040,
MsaaS8F2 = 0x0080,
MsaaS16F2 = 0x0100,
MsaaAllF2 = 0x01E0,
MsaaS4F4 = 0x0200,
MsaaS8F4 = 0x0400,
MsaaS16F4 = 0x0800,
MsaaAllF4 = 0x0E00,
MsaaS8F8 = 0x1000,
MsaaS16F8 = 0x2000,
MsaaAllF8 = 0x3000,
MsaaAll = 0x3FFF,
};
/// Supported RTIP version enumeration
enum class RayTracingIpLevel : uint32
{
_None = 0x0, ///< The device does not have an RayTracing Ip Level
#ifndef None
None = _None, ///< The device does not have an RayTracing Ip Level
#endif
RtIp1_0 = 0x1, ///< First Implementation of HW RT
RtIp1_1 = 0x2, ///< Added computation of triangle barycentrics into HW
RtIp2_0 = 0x3, ///< Added more Hardware RayTracing features, such as BoxSort, PointerFlag, etc
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888
#else // PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888
#endif
};
/// Which ip version a component has reported
struct IpTriple
{
uint32 major : 8; /// Major revision value
uint32 minor : 8; /// Minor revision value
uint32 stepping : 16; /// Stepping value
constexpr operator uint32() const
{
return ((stepping ) |
(minor << 16) |
(major << 24));
}
};
/// Reports various properties of a particular IDevice to the client. @see IDevice::GetProperties.
struct DeviceProperties
{
uint32 vendorId; ///< Vendor ID (should always be 0x1002 for AMD).
uint32 deviceId; ///< GPU device ID (e.g., Hawaii XT = 0x67B0).
uint32 revisionId; ///< GPU revision. HW-specific value differentiating between different
/// SKUs or revisions. Corresponds to one of the PRID_* revision IDs.
uint32 eRevId; ///< GPU emulation/internal revision ID.
AsicRevision revision; ///< ASIC revision.
GpuType gpuType; ///< Type of GPU (discrete vs. integrated)
uint16 gpuPerformanceCapacity; ///< Portion of GPU assigned in virtualized system (SRIOV)
///< 0-65535, 0 invalid (not virtualized), 1 min, 65535 max
GfxIpLevel gfxLevel; ///< IP level of this GPU's GFX block
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 888
OssIpLevel ossLevel; ///< IP level of this GPU's OSS block
VceIpLevel vceLevel; ///< IP level of this GPU's VCE block
UvdIpLevel uvdLevel; ///< IP level of this GPU's UVD block
#endif
VcnIpLevel vcnLevel; ///< IP level of this GPU's VCN block
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 888
SpuIpLevel spuLevel; ///< IP level of this GPU's SPU block
#endif
PspIpLevel pspLevel; ///< IP level of this GPU's PSP block
uint32 gfxStepping; ///< Stepping level of this GPU's GFX block
IpTriple gfxTriple; ///< Full GFX IP level (major.minor.step) of this GPU
char gpuName[MaxDeviceName]; ///< Null terminated string identifying the GPU.
uint32 gpuIndex; ///< Device's index in a linked adapter chain.
uint32 maxGpuMemoryRefsResident; ///< Maximum number of GPU memory references that can be resident
/// at any time. Memory references set both via IQueue and IDevice
/// (via AddGpuMemoryReferences() or Submit()) count against this limit.
uint64 timestampFrequency; ///< Frequency of the device's timestamp counter in Hz.
/// @see ICmdBuffer::CmdWriteTimestamp.
uint32 attachedScreenCount; ///< Number of screen attached to the device.
uint32 maxSemaphoreCount; ///< Queue semaphores cannot have a signal count higher than this value.
/// For example, one indicates that queue semaphores are binary.
PalPublicSettings settings; ///< Public settings that the client has the option of overriding
#if PAL_CLIENT_EXAMPLE
AddrHandle hAddrlib; ///< Handle to addrlib for directed image tests.
#endif
struct
{
union
{
struct
{
/// This engine supports timestamps (ICmdBuffer::CmdWriteTimestamp()).
uint32 supportsTimestamps : 1;
/// This engine supports ICmdBuffer::CmdSetPredication() based on Streamout/Occlusion query
uint32 supportsQueryPredication : 1;
/// This engine supports ICmdBuffer::CmdSetPredication() based on a 32-bit GPU memory allocation
uint32 supports32bitMemoryPredication : 1;
/// This engine supports ICmdBuffer::CmdSetPredication() based on a 64-bit GPU memory allocation
uint32 supports64bitMemoryPredication : 1;
/// This engine supports ICmdBuffer::If(), Else() and EndIf() calls.
uint32 supportsConditionalExecution : 1;
/// This engine supports ICmdBuffer::While() and EndWhile() calls.
uint32 supportsLoopExecution : 1;
/// This engine supports ICmdBuffer::CmdWaitRegisterValue(), WaitMemoryValue() and
/// CopyRegisterToMemory() calls.