Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 5 additions & 26 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,6 @@ void OpDispatchBuilder::RETOp(OpcodeArgs) {
if (CTX->Config.ABILocalFlags) {
_InvalidateFlags(~0UL); // all flags
InvalidatePF_AF();
// Deferred flags are invalidated now
InvalidateDeferredFlags();
}

Ref SP = _RMWHandle(LoadGPRRegister(X86State::REG_RSP));
Expand Down Expand Up @@ -557,8 +555,6 @@ void OpDispatchBuilder::CALLOp(OpcodeArgs) {
if (CTX->Config.ABILocalFlags) {
_InvalidateFlags(~0UL); // all flags
InvalidatePF_AF();
// Deferred flags are invalidated now
InvalidateDeferredFlags();
}

auto ConstantPC = GetRelocatedPC(Op);
Expand Down Expand Up @@ -2029,8 +2025,6 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) {
Ref Res = _Lshr(OpSize, Dest, Src);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

InvalidateDeferredFlags();

// Constant folded version of the above, with fused shifts.
if (Const > 1) {
Res = _Orlshl(OpSize, Res, Dest, Size + 1 - Const);
Expand Down Expand Up @@ -2254,8 +2248,6 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) {
Ref Res = _Lshl(OpSize, Dest, Src);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

InvalidateDeferredFlags();

// Res |= (Src << (Size - Shift + 1));
if (Const > 1) {
Res = _Orlshr(OpSize, Res, Dest, Size + 1 - Const);
Expand Down Expand Up @@ -2384,9 +2376,6 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) {
const uint32_t Size = GetDstBitSize(Op);
const uint32_t Mask = Size - 1;

// Deferred flags are invalidated now
InvalidateDeferredFlags();

if (IsNonconstant) {
// Because we mask explicitly with And/Bfe/Sbfe after, we can allow garbage here.
Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, {.AllowUpperGarbage = true});
Expand Down Expand Up @@ -2416,7 +2405,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) {
Value = _Lshr(IR::SizeToOpSize(LshrSize), Value, BitSelect);
}

SetCFDirect(Value, ConstantShift, true);
SetCFDirect_InvalidateNZV(Value, ConstantShift, Value);
}

switch (Action) {
Expand Down Expand Up @@ -2449,7 +2438,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) {
Value = Dest;
}

SetCFInverted(Value, ConstantShift, true);
SetCFInverted_InvalidateNZV(Value, ConstantShift, true);
StoreResult(GPRClass, Op, Dest, OpSize::iInvalid);
break;
}
Expand Down Expand Up @@ -2855,14 +2844,11 @@ void OpDispatchBuilder::DASOp(OpcodeArgs) {
}

void OpDispatchBuilder::AAAOp(OpcodeArgs) {
InvalidateDeferredFlags();

auto A = LoadGPRRegister(X86State::REG_RAX);
auto AF = CalculateAFForDecimal(A);

// CF = AF, OF/SF/ZF/PF undefined
ZeroNZCV();
SetCFDirect(AF);
SetCFDirect_InvalidateNZV(AF);
SetAFAndFixup(AF);
CalculateDeferredFlags();

Expand All @@ -2875,14 +2861,11 @@ void OpDispatchBuilder::AAAOp(OpcodeArgs) {
}

void OpDispatchBuilder::AASOp(OpcodeArgs) {
InvalidateDeferredFlags();

auto A = LoadGPRRegister(X86State::REG_RAX);
auto AF = CalculateAFForDecimal(A);

// CF = AF, OF/SF/ZF/PF undefined
ZeroNZCV();
SetCFDirect(AF);
SetCFDirect_InvalidateNZV(AF);
SetAFAndFixup(AF);
CalculateDeferredFlags();

Expand All @@ -2895,8 +2878,6 @@ void OpDispatchBuilder::AASOp(OpcodeArgs) {
}

void OpDispatchBuilder::AAMOp(OpcodeArgs) {
InvalidateDeferredFlags();

auto AL = LoadGPRRegister(X86State::REG_RAX, OpSize::i8Bit);
auto Imm8 = _Constant(Op->Src[0].Data.Literal.Value & 0xFF);
auto UDivOp = _UDiv(OpSize::i64Bit, AL, Imm8);
Expand All @@ -2910,8 +2891,6 @@ void OpDispatchBuilder::AAMOp(OpcodeArgs) {
}

void OpDispatchBuilder::AADOp(OpcodeArgs) {
InvalidateDeferredFlags();

auto A = LoadGPRRegister(X86State::REG_RAX);
auto AH = _Lshr(OpSize::i32Bit, A, _Constant(8));
auto Imm8 = _Constant(Op->Src[0].Data.Literal.Value & 0xFF);
Expand Down Expand Up @@ -4876,7 +4855,7 @@ void OpDispatchBuilder::RDRANDOp(OpcodeArgs) {
SetCFInverted(CF_inv);
} else {
// Accelerated path. Invalid is 0 or 1, so set NZCV with a single rmif.
HandleNZCVWrite(1u << 29 /* C */);
HandleNZCVWrite();
_RmifNZCV(CF_inv, (64 - 1) /* rotate bit 0 into bit 1 = C */, 0xf);
CFInverted = true;
}
Expand Down
93 changes: 47 additions & 46 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ class OpDispatchBuilder final : public IREmitter {
// If we loaded flags but didn't change them, invalidate the cached copy and move on.
// Changes get stored out by CalculateDeferredFlags.
CachedNZCV = nullptr;
PossiblySetNZCVBits = ~0U;
CFInverted = CFInvertedABI;
FlushRegisterCache();

Expand Down Expand Up @@ -1329,7 +1328,6 @@ class OpDispatchBuilder final : public IREmitter {

Ref CachedNZCV {};
bool NZCVDirty {};
uint32_t PossiblySetNZCVBits {};

// Set if the host carry is inverted from the guest carry. This is set after
// subtraction, because arm64 and x86 have inverted borrow flags, but clear
Expand Down Expand Up @@ -1579,31 +1577,26 @@ class OpDispatchBuilder final : public IREmitter {
return IR::SizeToOpSize(GetSrcSize(Op));
}

// Set flag tracking to prepare for an operation that directly writes NZCV. If
// some bits are known to be zeroed, the PossiblySetNZCVBits mask can be
// passed. Otherwise, it defaults to assuming all bits may be set after
// (this is conservative).
void HandleNZCVWrite(uint32_t _PossiblySetNZCVBits = ~0) {
InvalidateDeferredFlags();
// Set flag tracking to prepare for an operation that directly writes NZCV.
void HandleNZCVWrite() {
CachedNZCV = nullptr;
PossiblySetNZCVBits = _PossiblySetNZCVBits;
NZCVDirty = false;
}

// Set flag tracking to prepare for a read-modify-write operation on NZCV.
void HandleNZCV_RMW(uint32_t _PossiblySetNZCVBits = ~0) {
void HandleNZCV_RMW() {
CalculateDeferredFlags();

if (NZCVDirty && CachedNZCV) {
_StoreNZCV(CachedNZCV);
}

HandleNZCVWrite(_PossiblySetNZCVBits);
HandleNZCVWrite();
}

// Special case of the above where we are known to zero C/V
void HandleNZ00Write() {
HandleNZCVWrite((1u << 31) | (1u << 30));
HandleNZCVWrite();

// Host carry will be implicitly zeroed, and we want guest carry zeroed as
// well. So do not invert.
Expand All @@ -1625,7 +1618,6 @@ class OpDispatchBuilder final : public IREmitter {

void ZeroNZCV() {
CachedNZCV = _Constant(0);
PossiblySetNZCVBits = 0;
NZCVDirty = true;
}

Expand All @@ -1643,7 +1635,6 @@ class OpDispatchBuilder final : public IREmitter {
_SubNZCV(SrcSize, Res, _Constant(0));
}

PossiblySetNZCVBits |= 1u << IndexNZCV(FEXCore::X86State::RFLAG_CF_RAW_LOC);
CFInverted = true;
} else {
_TestNZ(SrcSize, Res, Res);
Expand All @@ -1662,13 +1653,8 @@ class OpDispatchBuilder final : public IREmitter {
void InsertNZCV(unsigned BitOffset, Ref Value, signed FlagOffset, bool MustMask) {
signed Bit = IndexNZCV(BitOffset);

// If NZCV is not dirty, we always want to use rmif, it's 1 instruction to
// implement this. But if NZCV is dirty, it might still be cheaper to copy
// the GPR flags to NZCV and rmif. This is a heuristic for cases where we
// expect that 2 instruction sequence to be a win (versus something like
// bfe+mov+bfi+mov which can happen with our RA..). It's not totally
// conservative but it's pretty good in practice.
bool PreferRmif = !NZCVDirty || FlagOffset || MustMask || (PossiblySetNZCVBits & (1u << Bit));
// Heuristic to choose rmif vs msr.
bool PreferRmif = !NZCVDirty || FlagOffset || MustMask;

if (CTX->HostFeatures.SupportsFlagM && PreferRmif) {
// Update NZCV
Expand All @@ -1689,16 +1675,8 @@ class OpDispatchBuilder final : public IREmitter {
Value = _Bfe(OpSize::i64Bit, 1, FlagOffset, Value);
}

if (PossiblySetNZCVBits == 0) {
SetNZCV(_Lshl(OpSize::i64Bit, Value, _Constant(Bit)));
} else if ((PossiblySetNZCVBits & (1u << Bit)) == 0) {
SetNZCV(_Orlshl(OpSize::i32Bit, GetNZCV(), Value, Bit));
} else {
SetNZCV(_Bfi(OpSize::i32Bit, 1, Bit, GetNZCV(), Value));
}
SetNZCV(_Bfi(OpSize::i32Bit, 1, Bit, GetNZCV(), Value));
}

PossiblySetNZCVBits |= (1u << Bit);
}

// If we don't care about N/C/V and just need Z, we can test with a simple
Expand Down Expand Up @@ -1737,7 +1715,6 @@ class OpDispatchBuilder final : public IREmitter {

void CarryInvert() {
CFInverted ^= true;
PossiblySetNZCVBits |= 1u << IndexNZCV(FEXCore::X86State::RFLAG_CF_RAW_LOC);
}

template<unsigned BitOffset>
Expand All @@ -1751,6 +1728,44 @@ class OpDispatchBuilder final : public IREmitter {
CFInverted = true;
}

// Set CF directly to the given 0/1 value. This needs to respect the
// invert. We use a subtraction:
//
// 0 - x = 0 + (~x) + 1.
//
// If x = 0, then 0 + (~0) + 1 = 0x100000000 so hardware C is set.
// If x = 1, then 0 + (~1) + 1 = 0x0ffffffff so hardware C is not set.
void SetCFDirect_InvalidateNZV(Ref Value, unsigned ValueOffset = 0, bool MustMask = false) {
if (ValueOffset || MustMask) {
Value = _Bfe(OpSize::i64Bit, 1, ValueOffset, Value);
}

HandleNZCVWrite();
_SubNZCV(OpSize::i32Bit, _Constant(0), Value);
CFInverted = true;
}

// As above but with
//
// x - 1
//
// If x = 0, hardware C is not set. If x = 1, hardware C is set.
void SetCFInverted_InvalidateNZV(Ref Value, unsigned ValueOffset = 0, bool MustMask = false) {
if (CTX->HostFeatures.SupportsFlagM) {
// This turns into a single rmif
SetCFInverted(Value, ValueOffset, MustMask);
} else {
// Do math on flagm
if (ValueOffset || MustMask) {
Value = _Bfe(OpSize::i64Bit, 1, ValueOffset, Value);
}

HandleNZCVWrite();
_SubNZCV(OpSize::i32Bit, Value, _InlineConstant(1));
CFInverted = true;
}
}

void SetCFInverted(Ref Value, unsigned ValueOffset = 0, bool MustMask = false) {
SetRFLAG(Value, X86State::RFLAG_CF_RAW_LOC, ValueOffset, MustMask);
CFInverted = true;
Expand Down Expand Up @@ -2010,9 +2025,7 @@ class OpDispatchBuilder final : public IREmitter {
Invert ^= CFInverted;
}

if (!(PossiblySetNZCVBits & (1u << IndexNZCV(BitOffset)))) {
return _Constant(Invert ? 1 : 0);
} else if (NZCVDirty) {
if (NZCVDirty) {
auto Value = _Bfe(OpSize::i32Bit, 1, IndexNZCV(BitOffset), GetNZCV());

if (Invert) {
Expand Down Expand Up @@ -2119,7 +2132,6 @@ class OpDispatchBuilder final : public IREmitter {
//
// Our AXFlag emulation on FlagM2-less systems needs V_inv passed.
_AXFlag(CTX->HostFeatures.SupportsFlagM2 ? Invalid() : V_inv);
PossiblySetNZCVBits = ~0;
CFInverted = true;
}

Expand All @@ -2133,7 +2145,6 @@ class OpDispatchBuilder final : public IREmitter {

// Convert to x86 flags, saves us from or'ing after.
_AXFlag(Invalid());
PossiblySetNZCVBits = ~0;
CFInverted = true;

// Copy the values.
Expand Down Expand Up @@ -2241,14 +2252,6 @@ class OpDispatchBuilder final : public IREmitter {
*/
void CalculateDeferredFlags();

/**
* @brief Invalidates NZCV. Mostly vestigial.
*/
void InvalidateDeferredFlags() {
// No NZCV bits will be set, they are all invalid.
PossiblySetNZCVBits = 0;
}

void ZeroShiftResult(FEXCore::X86Tables::DecodedOp Op) {
// In the case of zero-rotate, we need to store the destination still to deal with 32-bit semantics.
const auto Size = OpSizeFromSrc(Op);
Expand Down Expand Up @@ -2277,7 +2280,6 @@ class OpDispatchBuilder final : public IREmitter {
}

// Otherwise, prepare to branch.
uint32_t OldSetNZCVBits = PossiblySetNZCVBits;
auto Zero = _Constant(0);

// If the shift is zero, do not touch the flags.
Expand Down Expand Up @@ -2312,7 +2314,6 @@ class OpDispatchBuilder final : public IREmitter {

SetCurrentCodeBlock(EndBlock);
StartNewBlock();
PossiblySetNZCVBits |= OldSetNZCVBits;
}

/**
Expand Down
5 changes: 0 additions & 5 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2226,8 +2226,6 @@ void OpDispatchBuilder::AVX128_VPERM2(OpcodeArgs) {

template<IR::OpSize ElementSize>
void OpDispatchBuilder::AVX128_VTESTP(OpcodeArgs) {
InvalidateDeferredFlags();

const auto Size = GetSrcSize(Op);
const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;

Expand Down Expand Up @@ -2296,9 +2294,6 @@ void OpDispatchBuilder::AVX128_VTESTP(OpcodeArgs) {
}

void OpDispatchBuilder::AVX128_PTest(OpcodeArgs) {
// Invalidate deferred flags early
InvalidateDeferredFlags();

const auto Size = GetSrcSize(Op);
const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;

Expand Down
4 changes: 0 additions & 4 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,9 @@ void OpDispatchBuilder::SetPackedRFLAG(bool Lower8, Ref Src) {
size_t NumFlags = FlagOffsets.size();
if (Lower8) {
// Calculate flags early.
// Could use InvalidateDeferredFlags() if we had masked invalidation.
// This is only a partial overwrite of flags since OF isn't stored here.
CalculateDeferredFlags();
NumFlags = 5;
} else {
// We are overwriting all RFLAGS. Invalidate the deferred flag state.
InvalidateDeferredFlags();
}

// PF and CF are both stored inverted, so hoist the invert.
Expand Down
Loading
Loading