Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/DXIL.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3329,6 +3329,7 @@ SM.CSNOSIGNATURES Compute shaders mu
SM.DOMAINLOCATIONIDXOOB DomainLocation component index out of bounds for the domain.
SM.DSINPUTCONTROLPOINTCOUNTRANGE DS input control point count must be [0..%0]. %1 specified.
SM.DXILVERSION Target shader model requires specific Dxil Version
SM.EXPLICITTGSMSIZEONENTRY Total Thread Group Shared Memory used by entry must not exceed limit specified by entry attribute.
SM.GSINSTANCECOUNTRANGE GS instance count must be [1..%0]. %1 specified.
SM.GSOUTPUTVERTEXCOUNTRANGE GS output vertex count must be [0..%0]. %1 specified.
SM.GSTOTALOUTPUTVERTEXDATARANGE Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2. This value cannot be greater than %3.
Expand All @@ -3351,8 +3352,7 @@ SM.INVALIDSAMPLERFEEDBACKTYPE Invalid sampler fe
SM.INVALIDTEXTUREKINDONUAV TextureCube[Array] resources are not supported with UAVs.
SM.ISOLINEOUTPUTPRIMITIVEMISMATCH Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain.
SM.ISSPECIALFLOAT 16 bit IsSpecialFloat overloads require Shader Model 6.9 or higher.
SM.MAXMSSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1.
SM.MAXTGSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1.
SM.MAXTGSMSIZEONENTRY Total Thread Group Shared Memory used by entry must not exceed maximum for shader model.
SM.MAXTHEADGROUP Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.
SM.MESHPSIGROWCOUNT For shader '%0', primitive output signatures are taking up more than %1 rows.
SM.MESHSHADERINOUTSIZE For shader '%0', payload plus output size is greater than %1.
Expand Down
6 changes: 4 additions & 2 deletions include/dxc/DXIL/DxilFunctionProps.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ struct DxilWaveSize {
};

struct DxilFunctionProps {
static constexpr int kGroupSharedLimitUnset = -1;

DxilFunctionProps() {
memset(&ShaderProps, 0, sizeof(ShaderProps));
shaderKind = DXIL::ShaderKind::Invalid;
Expand All @@ -117,7 +119,7 @@ struct DxilFunctionProps {
memset(&Node, 0, sizeof(Node));
Node.LaunchType = DXIL::NodeLaunchType::Invalid;
Node.LocalRootArgumentsTableIndex = -1;
groupSharedLimitBytes = 0;
groupSharedLimitBytes = kGroupSharedLimitUnset;
}
union {
// Geometry shader.
Expand Down Expand Up @@ -175,7 +177,7 @@ struct DxilFunctionProps {
// numThreads shared between multiple shader types and node shaders.
unsigned numThreads[3];

unsigned groupSharedLimitBytes;
int groupSharedLimitBytes;

struct NodeProps {
DXIL::NodeLaunchType LaunchType = DXIL::NodeLaunchType::Invalid;
Expand Down
1 change: 0 additions & 1 deletion include/dxc/DXIL/DxilModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,6 @@ class DxilModule {
void SetNumThreads(unsigned x, unsigned y, unsigned z);
unsigned GetNumThreads(unsigned idx) const;

unsigned GetGroupSharedLimit() const;
// The total amount of group shared memory (in bytes) used by the shader.
unsigned GetTGSMSizeInBytes() const;

Expand Down
34 changes: 32 additions & 2 deletions lib/DXIL/DxilMetadataHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1626,7 +1626,9 @@ MDTuple *DxilMDHelper::EmitDxilEntryProperties(uint64_t rawShaderFlag,
}

const hlsl::ShaderModel *SM = GetShaderModel();
if (SM->IsSMAtLeast(6, 10)) {
if (SM->IsSMAtLeast(6, 10) &&
props.groupSharedLimitBytes !=
DxilFunctionProps::kGroupSharedLimitUnset) {
MDVals.emplace_back(
Uint32ToConstMD(DxilMDHelper::kDxilGroupSharedLimitTag));
MDVals.emplace_back(Uint32ToConstMD(props.groupSharedLimitBytes));
Expand Down Expand Up @@ -1695,13 +1697,31 @@ MDTuple *DxilMDHelper::EmitDxilEntryProperties(uint64_t rawShaderFlag,
MS.maxPrimitiveCount, MS.outputTopology,
MS.payloadSizeInBytes);
MDVals.emplace_back(pMDTuple);

const hlsl::ShaderModel *SM = GetShaderModel();
if (SM->IsSMAtLeast(6, 10) &&
props.groupSharedLimitBytes !=
DxilFunctionProps::kGroupSharedLimitUnset) {
MDVals.emplace_back(
Uint32ToConstMD(DxilMDHelper::kDxilGroupSharedLimitTag));
MDVals.emplace_back(Uint32ToConstMD(props.groupSharedLimitBytes));
}
} break;
case DXIL::ShaderKind::Amplification: {
auto &AS = props.ShaderProps.AS;
MDVals.emplace_back(Uint32ToConstMD(DxilMDHelper::kDxilASStateTag));
MDTuple *pMDTuple =
EmitDxilASState(props.numThreads, AS.payloadSizeInBytes);
MDVals.emplace_back(pMDTuple);

const hlsl::ShaderModel *SM = GetShaderModel();
if (SM->IsSMAtLeast(6, 10) &&
props.groupSharedLimitBytes !=
DxilFunctionProps::kGroupSharedLimitUnset) {
MDVals.emplace_back(
Uint32ToConstMD(DxilMDHelper::kDxilGroupSharedLimitTag));
MDVals.emplace_back(Uint32ToConstMD(props.groupSharedLimitBytes));
}
} break;
case DXIL::ShaderKind::Node: {
// The Node specific properties have already been handled by
Expand All @@ -1714,6 +1734,15 @@ MDTuple *DxilMDHelper::EmitDxilEntryProperties(uint64_t rawShaderFlag,
NumThreadVals.emplace_back(Uint32ToConstMD(props.numThreads[1]));
NumThreadVals.emplace_back(Uint32ToConstMD(props.numThreads[2]));
MDVals.emplace_back(MDNode::get(m_Ctx, NumThreadVals));

const hlsl::ShaderModel *SM = GetShaderModel();
if (SM->IsSMAtLeast(6, 10) &&
props.groupSharedLimitBytes !=
DxilFunctionProps::kGroupSharedLimitUnset) {
MDVals.emplace_back(
Uint32ToConstMD(DxilMDHelper::kDxilGroupSharedLimitTag));
MDVals.emplace_back(Uint32ToConstMD(props.groupSharedLimitBytes));
}
} break;
default:
break;
Expand Down Expand Up @@ -1781,7 +1810,8 @@ void DxilMDHelper::LoadDxilEntryProperties(const MDOperand &MDO,
} break;

case DxilMDHelper::kDxilGroupSharedLimitTag: {
DXASSERT(props.IsCS(), "else invalid shader kind");
DXASSERT(props.IsCS() || props.IsMS() || props.IsAS() || props.IsNode(),
"else invalid shader kind");
props.groupSharedLimitBytes = ConstMDToUint32(MDO);
if (!m_pSM->IsSMAtLeast(6, 10))
m_bExtraMetadata = true;
Expand Down
15 changes: 6 additions & 9 deletions lib/DXIL/DxilModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,12 @@ void DxilModule::SetEntryFunction(Function *pEntryFunc) {
// Move entry props to new function in order to preserve them.
std::unique_ptr<DxilEntryProps> Props =
std::move(m_DxilEntryPropsMap.begin()->second);
// For HS, make sure we add the patch constant function to the set of patch
// constant functions.
m_PatchConstantFunctions.clear();
if (Props->props.IsHS() && Props->props.ShaderProps.HS.patchConstantFunc)
m_PatchConstantFunctions.insert(
Props->props.ShaderProps.HS.patchConstantFunc);
m_DxilEntryPropsMap.clear();
m_DxilEntryPropsMap[m_pEntryFunc] = std::move(Props);
}
Expand Down Expand Up @@ -412,15 +418,6 @@ unsigned DxilModule::GetNumThreads(unsigned idx) const {
return props.numThreads[idx];
}

unsigned DxilModule::GetGroupSharedLimit() const {
DXASSERT(m_DxilEntryPropsMap.size() == 1 &&
(m_pSM->IsCS() || m_pSM->IsMS() || m_pSM->IsAS()),
"only works for CS/MS/AS profiles");
const DxilFunctionProps &props = m_DxilEntryPropsMap.begin()->second->props;
DXASSERT_NOMSG(m_pSM->GetKind() == props.shaderKind);
return props.groupSharedLimitBytes;
}

unsigned DxilModule::GetTGSMSizeInBytes() const {
const DataLayout &DL = m_pModule->getDataLayout();
unsigned TGSMSize = 0;
Expand Down
186 changes: 136 additions & 50 deletions lib/DxilValidation/DxilValidation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3916,70 +3916,156 @@ static void ValidateGlobalVariables(ValidationContext &ValCtx) {
DxilModule &M = ValCtx.DxilMod;

const ShaderModel *pSM = ValCtx.DxilMod.GetShaderModel();
bool TGSMAllowed = pSM->IsCS() || pSM->IsAS() || pSM->IsMS() || pSM->IsLib();

unsigned TGSMSize = 0;
std::vector<StoreInst *> FixAddrTGSMList;
const DataLayout &DL = M.GetModule()->getDataLayout();
std::vector<StoreInst *> FixAddrTGSMList;

auto isTGSMEntry = [](DXIL::ShaderKind Kind) -> bool {
return Kind == DXIL::ShaderKind::Compute ||
Kind == DXIL::ShaderKind::Amplification ||
Kind == DXIL::ShaderKind::Mesh || Kind == DXIL::ShaderKind::Node;
};

auto getMaxTGSM = [](const DxilFunctionProps &Props) -> unsigned {
if (Props.groupSharedLimitBytes >= 0)
return static_cast<unsigned>(Props.groupSharedLimitBytes);
if (Props.IsCS() || Props.IsAS() || Props.IsNode())
return DXIL::kMaxTGSMSize;
else if (Props.IsMS())
return DXIL::kMaxMSSMSize;
return 0;
};

DenseMap<const Function *, uint32_t> TGSMInFunc;
// Initialize all function TGSM usage to zero
for (auto &function : M.GetModule()->getFunctionList())
TGSMInFunc[&function] = 0;

// Map TGSM overages per function, used for error reporting
// Tracks first user per GV that caused overage.
typedef MapVector<GlobalVariable *, Instruction *> FirstUserMap;
typedef DenseMap<const Function *, FirstUserMap> TGSMOverageMap;
TGSMOverageMap TGSMOverages;

auto ReportTGSMOverages = [&](Function *EntryFunc) {
unsigned Size = TGSMInFunc[EntryFunc];
if (!Size)
return; // No TGSM used.

// Several possibilities:
// - Entry point or library function with function properties
// - Patch constant function without function properties, TGSM not allowed
// - No-inline function without function properties, TGSM counted in entry
DXIL::ShaderKind Kind = DXIL::ShaderKind::Invalid;
bool IsPatchConstant = M.IsPatchConstantShader(EntryFunc);
if (M.HasDxilFunctionProps(EntryFunc))
Kind = M.GetDxilEntryProps(EntryFunc).props.shaderKind;
else if (!IsPatchConstant)
return; // no-inline function, accounted for in entry

auto Overages = TGSMOverages.find(EntryFunc);
if (Overages == TGSMOverages.end())
return;

unsigned MaxSize = 0;
ValidationRule Rule = ValidationRule::SmMaxTGSMSizeOnEntry;

// Props only exist if not a patch constant function.
if (!IsPatchConstant) {
DxilFunctionProps &Props = M.GetDxilFunctionProps(EntryFunc);
MaxSize = getMaxTGSM(Props);
Rule = Props.groupSharedLimitBytes !=
DxilFunctionProps::kGroupSharedLimitUnset
? ValidationRule::SmExplicitTGSMSizeOnEntry
: ValidationRule::SmMaxTGSMSizeOnEntry;
}

for (auto &GVAndUser : Overages->second) {
Instruction *UseInst = GVAndUser.second;
if (!isTGSMEntry(Kind))
ValCtx.EmitInstrFormatError(UseInst, ValidationRule::SmTGSMUnsupported,
{"from non-compute entry points"});
else
ValCtx.EmitInstrFormatError(UseInst, Rule,
{EntryFunc->getName(), std::to_string(Size),
std::to_string(MaxSize)});
}
};

struct WorkListEntry {
User *U;
// FirstUser tracks the first (inner-most) instruction user of the TGSM
// variable for this worklist entry.
Instruction *FirstUser;
};

// Collect total groupshared memory potentially used by every function
for (GlobalVariable &GV : M.GetModule()->globals()) {
ValidateGlobalVariable(GV, ValCtx);
if (GV.getType()->getAddressSpace() == DXIL::kTGSMAddrSpace) {
if (!TGSMAllowed)
ValCtx.EmitGlobalVariableFormatError(
&GV, ValidationRule::SmTGSMUnsupported,
{std::string("in Shader Model ") + M.GetShaderModel()->GetName()});
// Lib targets need to check the usage to know if it's allowed
if (pSM->IsLib()) {
for (User *U : GV.users()) {
if (Instruction *I = dyn_cast<Instruction>(U)) {
llvm::Function *F = I->getParent()->getParent();
SmallPtrSet<llvm::Function *, 8> completeFuncs;
SmallVector<WorkListEntry, 16> WorkList;
auto AddUsers = [&WorkList](User *U, Instruction *FirstUser) {
for (User *U : U->users()) {
if (!FirstUser && isa<Instruction>(U))
WorkList.push_back({U, cast<Instruction>(U)});
else
WorkList.push_back({U, FirstUser});
}
};
uint32_t GVSize = DL.getTypeAllocSize(GV.getType()->getElementType());

AddUsers(&GV, nullptr);

while (!WorkList.empty()) {
WorkListEntry Info = WorkList.pop_back_val();
// If const, keep going until we find something we can use
if (isa<Constant>(Info.U)) {
AddUsers(Info.U, Info.FirstUser);
continue;
}

if (Instruction *I = dyn_cast<Instruction>(Info.U)) {
llvm::Function *F = I->getParent()->getParent();
if (completeFuncs.insert(F).second) {
// If function is new, process it and its users
// Add users to the worklist
Instruction *FirstUser = Info.FirstUser ? Info.FirstUser : I;
AddUsers(F, FirstUser);
// Add groupshared size to function's total
unsigned &TotalSize = TGSMInFunc[F];
TotalSize += GVSize;
// If this is an entry function, check the TotalSize against the
// limits.
if (M.HasDxilEntryProps(F)) {
DxilFunctionProps &Props = M.GetDxilEntryProps(F).props;
if (!Props.IsCS() && !Props.IsAS() && !Props.IsMS() &&
!Props.IsNode()) {
ValCtx.EmitInstrFormatError(I,
ValidationRule::SmTGSMUnsupported,
{"from non-compute entry points"});
}
const DxilFunctionProps &Props = M.GetDxilEntryProps(F).props;
unsigned MaxSize = getMaxTGSM(Props);
if (TotalSize > MaxSize && TGSMOverages[F].count(&GV) == 0)
TGSMOverages[F][&GV] = FirstUser;
} else if (M.IsPatchConstantShader(F)) {
// Collect illegal usage for error reporting
if (TGSMOverages[F].count(&GV) == 0)
TGSMOverages[F][&GV] = FirstUser;
}
}
}
}
TGSMSize += DL.getTypeAllocSize(GV.getType()->getElementType());
CollectFixAddressAccess(&GV, FixAddrTGSMList);
}
}

ValidationRule Rule = ValidationRule::SmMaxTGSMSize;
unsigned MaxSize = DXIL::kMaxTGSMSize;

if (M.GetShaderModel()->IsMS()) {
Rule = ValidationRule::SmMaxMSSMSize;
MaxSize = DXIL::kMaxMSSMSize;
}

// Check if the entry function has attribute to override TGSM size.
if (M.HasDxilEntryProps(M.GetEntryFunction())) {
DxilEntryProps &EntryProps = M.GetDxilEntryProps(M.GetEntryFunction());
if (EntryProps.props.IsCS()) {
unsigned SpecifiedTGSMSize = EntryProps.props.groupSharedLimitBytes;
if (SpecifiedTGSMSize > 0) {
MaxSize = SpecifiedTGSMSize;
}
if (pSM->IsLib()) {
for (auto &F : M.GetModule()->functions()) {
if (F.isDeclaration() ||
!(M.HasDxilEntryProps(&F) || M.IsPatchConstantShader(&F)))
continue;
ReportTGSMOverages(&F);
}
}

if (TGSMSize > MaxSize) {
Module::global_iterator GI = M.GetModule()->global_end();
GlobalVariable *GV = &*GI;
do {
GI--;
GV = &*GI;
if (GV->getType()->getAddressSpace() == hlsl::DXIL::kTGSMAddrSpace)
break;
} while (GI != M.GetModule()->global_begin());
ValCtx.EmitGlobalVariableFormatError(
GV, Rule, {std::to_string(TGSMSize), std::to_string(MaxSize)});
} else {
Function *EntryFunc = M.GetEntryFunction();
if (EntryFunc)
ReportTGSMOverages(EntryFunc);
if (pSM->IsHS())
ReportTGSMOverages(M.GetPatchConstantFunction());
}

if (!FixAddrTGSMList.empty()) {
Expand Down
9 changes: 2 additions & 7 deletions tools/clang/lib/CodeGen/CGHLSLMS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1653,13 +1653,8 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
FD->getAttr<HLSLGroupSharedLimitAttr>()) {
funcProps->groupSharedLimitBytes = Attr->getLimit();
} else {
if (SM->IsMS()) { // Fallback to default limits
funcProps->groupSharedLimitBytes = DXIL::kMaxMSSMSize; // 28k For MS
} else if (SM->IsAS() || SM->IsCS()) {
funcProps->groupSharedLimitBytes = DXIL::kMaxTGSMSize; // 32k For AS/CS
} else {
funcProps->groupSharedLimitBytes = 0;
}
funcProps->groupSharedLimitBytes =
DxilFunctionProps::kGroupSharedLimitUnset; // not specified
}

// Hull shader.
Expand Down
2 changes: 1 addition & 1 deletion tools/clang/test/CodeGenHLSL/mesh-val/oversizeSM.hlsl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// RUN: %dxc -E main -T ms_6_5 %s | FileCheck %s

// CHECK: Total Thread Group Shared Memory storage is 28676, exceeded 28672
// CHECK: Total Thread Group Shared Memory used by 'main' is 28676, exceeding maximum: 28672.

#define MAX_VERT 32
#define MAX_PRIM 16
Expand Down
Loading