Skip to content

Commit

Permalink
VOPC true16
Browse files Browse the repository at this point in the history
  • Loading branch information
broxigarchen committed Jan 10, 2025
1 parent d6b6598 commit bc11a5d
Show file tree
Hide file tree
Showing 32 changed files with 2,146 additions and 940 deletions.
19 changes: 17 additions & 2 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -668,9 +668,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,

if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
convertVOP3PDPPInst(MI);
else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
AMDGPU::isVOPC64DPP(MI.getOpcode()))
else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
convertVOPCDPPInst(MI); // Special VOP3 case
else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
convertVOPC64DPPInst(MI); // Special VOP3 case
else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
-1)
convertDPP8Inst(MI);
Expand Down Expand Up @@ -1254,6 +1255,20 @@ void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
AMDGPU::OpName::src1_modifiers);
}

void AMDGPUDisassembler::convertVOPC64DPPInst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();

convertTrue16OpSel(MI);

if (MI.getNumOperands() < DescNumOps &&
AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
auto Mods = collectVOPModifiers(MI);
insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
AMDGPU::OpName::op_sel);
}
}

void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
assert(HasLiteral && "Should have decoded a literal");
const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ class AMDGPUDisassembler : public MCDisassembler {
void convertVOP3DPPInst(MCInst &MI) const;
void convertVOP3PDPPInst(MCInst &MI) const;
void convertVOPCDPPInst(MCInst &MI) const;
void convertVOPC64DPPInst(MCInst &MI) const;
void convertMacDPPInst(MCInst &MI) const;
void convertTrue16OpSel(MCInst &MI) const;

Expand Down
190 changes: 129 additions & 61 deletions llvm/lib/Target/AMDGPU/VOPCInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -89,23 +89,57 @@ multiclass VOPC_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType
def _t16 : VOPC_Profile<sched, vt0, vt1> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
let HasOpSel = 1;
let HasModifiers = 1; // All instructions at least have OpSel
let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
let Src0VOP3DPP = VGPRSrc_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;

let DstRC64 = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 1/*IsVOP3Encoding*/>.ret;
let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
}
def _fake16: VOPC_Profile<sched, vt0, vt1> {
let IsTrue16 = 1;
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
let Src0VOP3DPP = VGPRSrc_32;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;

let DstRC64 = getVALUDstForVT<DstVT>.ret;
let Src0RC64 = getVOP3SrcForVT<Src0VT, 0/*IsTrue16*/>.ret;
let Src1RC64 = getVOP3SrcForVT<Src1VT, 0/*IsTrue16*/>.ret;
let Src2RC64 = getVOP3SrcForVT<Src2VT, 0/*IsTrue16*/>.ret;
let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src1Mod = getSrcMod<Src1VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src2Mod = getSrcMod<Src2VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
}
}

Expand Down Expand Up @@ -283,7 +317,9 @@ class getVOPCPat64 <SDPatternOperator cond, VOPProfile P> : LetDummies {
(setcc (P.Src0VT
!if(P.HasOMod,
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))),
!if(P.HasClamp,
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
(VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers)))),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
cond))],
[(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]);
Expand Down Expand Up @@ -324,6 +360,10 @@ multiclass VOPC_Pseudos <string opName,
let SchedRW = P.Schedule;
let isCompare = 1;
let isCommutable = 1;
let AsmMatchConverter =
!if (P.HasOpSel, "cvtVOP3OpSel",
!if (!or(P.HasModifiers, P.HasOMod, P.HasIntClamp), "cvtVOP3",
""));
}

if P.HasExtSDWA then
Expand Down Expand Up @@ -1344,29 +1384,9 @@ class VOPC_DPP8<bits<8> op, VOPC_Pseudo ps, string opName = ps.OpName>

// VOPC64

class VOPC64_DPP_Base<bits<10> op, string OpName, VOPProfile P>
: VOP3_DPP_Base<OpName, P, 1>, VOP3_DPPe_Common<op, P> {
class VOPC64_DPP<VOP_DPP_Pseudo ps, string opName = ps.OpName>
: VOP3_DPP_Base<opName, ps.Pfl, 1> {
Instruction Opcode = !cast<Instruction>(NAME);

bits<8> src0;
bits<9> dpp_ctrl;
bits<1> bound_ctrl;
bits<4> bank_mask;
bits<4> row_mask;
bit fi;

let Inst{40-32} = 0xfa;
let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
let Inst{80-72} = dpp_ctrl;
let Inst{82} = fi;
let Inst{83} = bound_ctrl;
// Inst{87-84} ignored by hw
let Inst{91-88} = bank_mask;
let Inst{95-92} = row_mask;
}

class VOPC64_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
: VOPC64_DPP_Base<op, opName, ps.Pfl> {
let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
let True16Predicate = ps.True16Predicate;
Expand All @@ -1380,32 +1400,35 @@ class VOPC64_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>

class VOPC64_DPP16_Dst<bits<10> op, VOP_DPP_Pseudo ps,
string opName = ps.OpName>
: VOPC64_DPP16<op, ps, opName> {
: VOPC64_DPP<ps, opName>, VOP3_DPP_Enc<op, ps.Pfl, 1> {
bits<8> sdst;
let Inst{7-0} = sdst;
}

class VOPC64_DPP16_NoDst<bits<10> op, VOP_DPP_Pseudo ps,
string opName = ps.OpName>
: VOPC64_DPP16<op, ps, opName> {
: VOPC64_DPP<ps, opName>, VOP3_DPP_Enc<op, ps.Pfl, 1> {
let Inst{7-0} = ? ;
}

class VOPC64_DPP8_Base<bits<10> op, string OpName, VOPProfile P>
: VOP3_DPP8_Base<OpName, P>, VOP3_DPPe_Common<op, P> {
Instruction Opcode = !cast<Instruction>(NAME);

bits<8> src0;
bits<24> dpp8;
bits<9> fi;
class VOPC64_DPP16_Dst_t16<bits<10> op, VOP_DPP_Pseudo ps,
string opName = ps.OpName>
: VOPC64_DPP<ps, opName>, VOP3_DPP_Enc_t16<op, ps.Pfl, 1> {
bits<8> sdst;
let Inst{7-0} = sdst;
let Inst{14} = 0;
}

let Inst{40-32} = fi;
let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0);
let Inst{95-72} = dpp8{23-0};
class VOPC64_DPP16_NoDst_t16<bits<10> op, VOP_DPP_Pseudo ps,
string opName = ps.OpName>
: VOPC64_DPP<ps, opName>, VOP3_DPP_Enc_t16<op, ps.Pfl, 1> {
let Inst{7-0} = ? ;
let Inst{14} = 0;
}

class VOPC64_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
: VOPC64_DPP8_Base<op, opName, ps.Pfl> {
class VOPC64_DPP8<VOP_Pseudo ps, string opName = ps.OpName>
: VOP3_DPP8_Base<opName, ps.Pfl> {
Instruction Opcode = !cast<Instruction>(NAME);
// Note ps is the non-dpp pseudo
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
Expand All @@ -1416,15 +1439,30 @@ class VOPC64_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
}

class VOPC64_DPP8_Dst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
: VOPC64_DPP8<op, ps, opName> {
: VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc<op, ps.Pfl> {
bits<8> sdst;
let Inst{7-0} = sdst;
let Constraints = "";
}

class VOPC64_DPP8_NoDst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
: VOPC64_DPP8<op, ps, opName> {
: VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc<op, ps.Pfl> {
let Inst{7-0} = ? ;
let Constraints = "";
}

class VOPC64_DPP8_Dst_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
: VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc_t16<op, ps.Pfl> {
bits<8> sdst;
let Inst{7-0} = sdst;
let Inst{14} = 0;
let Constraints = "";
}

class VOPC64_DPP8_NoDst_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
: VOPC64_DPP8<ps, opName>, VOP3_DPP8_Enc_t16<op, ps.Pfl> {
let Inst{7-0} = ? ;
let Inst{14} = 0;
let Constraints = "";
}

Expand All @@ -1442,7 +1480,7 @@ multiclass VOPC_Real_Base<GFXGen Gen, bits<9> op> {
defvar ps64 = !cast<VOP3_Pseudo>(NAME#"_e64");
def _e32#Gen.Suffix : VOPC_Real<ps32, Gen.Subtarget>,
VOPCe<op{7-0}>;
def _e64#Gen.Suffix : VOP3_Real<ps64, Gen.Subtarget>,
def _e64#Gen.Suffix : VOP3_Real_Gen<ps64, Gen>,
VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
// Encoding used for VOPC instructions encoded as VOP3 differs from
// VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
Expand Down Expand Up @@ -1508,13 +1546,25 @@ multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,
// the destination-less 32bit forms add it to the asmString here.
VOPC_Real<ps32, Gen.Subtarget, asm_name#"_e32">,
VOPCe<op{7-0}>;
def _e64#Gen.Suffix :
VOP3_Real_Gen<ps64, Gen, asm_name>,
VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
// Encoding used for VOPC instructions encoded as VOP3 differs from
// VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
bits<8> sdst;
let Inst{7-0} = sdst;
if ps64.Pfl.IsRealTrue16 then {
def _e64#Gen.Suffix :
VOP3_Real_Gen<ps64, Gen, asm_name>,
VOP3e_t16_gfx11_gfx12<{0, op}, ps64.Pfl> {
// Encoding used for VOPC instructions encoded as VOP3 differs from
// VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
bits<8> sdst;
let Inst{7-0} = sdst;
let Inst{14} = 0;
}
} else {
def _e64#Gen.Suffix :
VOP3_Real_Gen<ps64, Gen, asm_name>,
VOP3a_gfx11_gfx12<{0, op}, ps64.Pfl> {
// Encoding used for VOPC instructions encoded as VOP3 differs from
// VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
bits<8> sdst;
let Inst{7-0} = sdst;
}
}

defm : VOPCInstAliases<OpName, !substr(Gen.Suffix, 1), NAME, asm_name>;
Expand Down Expand Up @@ -1554,9 +1604,15 @@ multiclass VOPC_Real_with_name<GFXGen Gen, bits<9> op, string OpName,

if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e64" #"_dpp");
def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
if ps64.Pfl.IsRealTrue16 then {
def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst_t16<{0, op}, psDPP, asm_name>,
SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst_t16<{0, op}, ps64, asm_name>;
} else {
def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>,
SIMCInstr<psDPP.PseudoInstr, Gen.Subtarget>;
def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>;
}
} // end if ps64.Pfl.HasExtVOP3DPP
} // End DecoderNamespace
} // End AssemblerPredicate
Expand Down Expand Up @@ -1693,11 +1749,23 @@ multiclass VOPC_Real_t16_gfx11<bits <9> op, string asm_name,
string OpName = NAME, string pseudo_mnemonic = ""> :
VOPC_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>;

multiclass VOPC_Real_t16_and_fake16_gfx11<bits <9> op, string asm_name,
string OpName = NAME, string pseudo_mnemonic = ""> {
defm _t16: VOPC_Real_t16_gfx11<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
defm _fake16: VOPC_Real_t16_gfx11<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
}

multiclass VOPC_Real_t16_gfx11_gfx12<bits <9> op, string asm_name,
string OpName = NAME, string pseudo_mnemonic = ""> :
VOPC_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>,
VOPC_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;

multiclass VOPC_Real_t16_and_fake16_gfx11_gfx12<bits <9> op, string asm_name,
string OpName = NAME, string pseudo_mnemonic = ""> {
defm _t16: VOPC_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_t16", pseudo_mnemonic>;
defm _fake16: VOPC_Real_t16_gfx11_gfx12<op, asm_name, OpName#"_fake16", pseudo_mnemonic>;
}

multiclass VOPCX_Real_t16_gfx11<bits<9> op, string asm_name,
string OpName = NAME, string pseudo_mnemonic = ""> :
VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>;
Expand All @@ -1708,7 +1776,7 @@ multiclass VOPCX_Real_t16_gfx11_gfx12<bits<9> op, string asm_name,
VOPCX_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;

defm V_CMP_F_F16_fake16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
defm V_CMP_LT_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
defm V_CMP_LT_F16 : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
defm V_CMP_EQ_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
defm V_CMP_LE_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x003, "v_cmp_le_f16">;
defm V_CMP_GT_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x004, "v_cmp_gt_f16">;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/VOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2033,7 +2033,7 @@ def VOP2InfoTable : VOPInfoTable<"VOP2">;
def VOP3InfoTable : VOPInfoTable<"VOP3">;

class VOPC64Table <string Format> : GenericTable {
let FilterClass = "VOPC64_" # Format # "_Base";
let FilterClass = "VOPC64_" # Format;
let CppTypeName = "VOPC64DPPInfo";
let Fields = ["Opcode"];

Expand Down
Loading

0 comments on commit bc11a5d

Please sign in to comment.