Skip to content

Commit

Permalink
[DYNAREC] Improved x87 comparison (backporteed from Box64)
Browse files Browse the repository at this point in the history
  • Loading branch information
ptitSeb committed Dec 4, 2024
1 parent c9d20f6 commit f1559b0
Show file tree
Hide file tree
Showing 11 changed files with 76 additions and 24 deletions.
2 changes: 1 addition & 1 deletion src/dynarec/dynarec_arm_0f.c
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VMOVD(d1, v0);
}
VCMP_F32(d1*2, s0);
FCOMI(x1, x2);
FCOMI(x1, x2, 0, 0, d1*2, s0, 1);
break;

case 0x31:
Expand Down
2 changes: 1 addition & 1 deletion src/dynarec/dynarec_arm_660f.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ uintptr_t dynarec660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nins
v0 = sse_get_reg(dyn, ninst, x1, gd, 0);
GETEX(q0, 0);
VCMP_F64(v0, q0);
FCOMI(x1, x2);
FCOMI(x1, x2, 0, 0, v0, q0, 0);
break;

case 0x38: // SSSE3 opcodes
Expand Down
8 changes: 4 additions & 4 deletions src/dynarec/dynarec_arm_d8.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
break;
case 0xD8:
case 0xD9:
Expand All @@ -118,7 +118,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xE0:
Expand Down Expand Up @@ -275,7 +275,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VCVT_F64_F32(d1, s0);
VCMP_F64(v1, d1);
}
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, ST_IS_F(0)?s0:d1, ST_IS_F(0));
break;
case 3:
INST_NAME("FCOMP ST0, float[ED]");
Expand All @@ -296,7 +296,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VCVT_F64_F32(d1, s0);
VCMP_F64(v1, d1);
}
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, ST_IS_F(0)?s0:d1, ST_IS_F(0));
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 4:
Expand Down
2 changes: 1 addition & 1 deletion src/dynarec/dynarec_arm_d9.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64_0(v1);
}
FCOM(x1, x2); // same flags...
FCOM(x1, x2, 0, 0, v1, 0, ST_IS_F(0)); // same flags...
break;
case 0xE5:
INST_NAME("FXAM");
Expand Down
6 changes: 3 additions & 3 deletions src/dynarec/dynarec_arm_da.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
X87_POP_OR_FAIL(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
Expand Down Expand Up @@ -185,7 +185,7 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VMOVtoV(s0, ed);
VCVT_F64_S32(d0, s0);
VCMP_F64(v1, d0);
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, d0, 0);
break;
case 3:
INST_NAME("FICOMP ST0, Ed");
Expand All @@ -196,7 +196,7 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VMOVtoV(s0, ed);
VCVT_F64_S32(d0, s0);
VCMP_F64(v1, d0);
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, d0, 0);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 4:
Expand Down
4 changes: 2 additions & 2 deletions src/dynarec/dynarec_arm_db.c
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64(v1, v2);
}
FCOMI(x1, x2);
FCOMI(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
break;
case 0xF0:
case 0xF1:
Expand All @@ -168,7 +168,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64(v1, v2);
}
FCOMI(x1, x2);
FCOMI(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
break;

case 0xE0:
Expand Down
8 changes: 4 additions & 4 deletions src/dynarec/dynarec_arm_dc.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
break;
case 0xD8:
case 0xD9:
Expand All @@ -114,7 +114,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xE0:
Expand Down Expand Up @@ -258,7 +258,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VMOVtoV_D(d1, x2, x3);
}
VCMP_F64(v1, d1);
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, d1, 0);
break;
case 3:
INST_NAME("FCOMP ST0, double[ED]");
Expand All @@ -275,7 +275,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VMOVtoV_D(d1, x2, x3);
}
VCMP_F64(v1, d1);
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, d1, 0);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 4:
Expand Down
4 changes: 2 additions & 2 deletions src/dynarec/dynarec_arm_dd.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
break;
case 0xE8:
case 0xE9:
Expand All @@ -129,7 +129,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
X87_POP_OR_FAIL(dyn, ninst, x3);
break;

Expand Down
4 changes: 2 additions & 2 deletions src/dynarec/dynarec_arm_de.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
X87_POP_OR_FAIL(dyn, ninst, x3);
break;

Expand All @@ -107,7 +107,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
X87_POP_OR_FAIL(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
Expand Down
4 changes: 2 additions & 2 deletions src/dynarec/dynarec_arm_df.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64(v1, v2);
}
FCOMI(x1, x2);
FCOMI(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xF0:
Expand All @@ -111,7 +111,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
} else {
VCMP_F64(v1, v2);
}
FCOMI(x1, x2);
FCOMI(x1, x2, x3, x14, v1, v2, ST_IS_F(0));
X87_POP_OR_FAIL(dyn, ninst, x3);
break;

Expand Down
56 changes: 54 additions & 2 deletions src/dynarec/dynarec_arm_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,25 +316,77 @@
#define IFXN(A, B) if((dyn->insts[ninst].x86.gen_flags&(A) && !(dyn->insts[ninst].x86.gen_flags&(B))))

// Generate FCOM with s1 and s2 scratch regs (the VCMP is already done)
#define FCOM(s1, s2) \
#define FCOM(s1, s2, s3, s4, v1, v2, is_f) \
VMRS_APSR(); /* 0b0100011100000000 */ \
LDRH_IMM8(s1, xEmu, offsetof(x86emu_t, sw)); /*offset is 8bits right?*/ \
BIC_IMM8(s1, s1, 0b01000111, 12); \
ORR_IMM8_COND(cVS, s1, s1, 0b01000101, 12); /* unordered */ \
ORR_IMM8_COND(cEQ, s1, s1, 0b01000000, 12); /* equal */ \
ORR_IMM8_COND(cMI, s1, s1, 0b00000001, 12); /* less than */ \
/* greater than leave 0 */ \
if(s4) { \
Bcond(cVS, (is_f?11:13)*4-8); \
if(is_f) { \
MOVW(s4, 0); \
MOVT(s4, 0x7ff0); /* +inf */ \
VMOVfrV(s2, v1); \
CMPS_REG_LSL_IMM5(s2, s4, 0); \
Bcond(cEQ, 5*4-8); /* same */ \
VMOVfrV(s2, v2); \
ORR_IMM8(s4, s4, 0b10, 1); /* -inf */ \
CMPS_REG_LSL_IMM5(s2, s4, 0); \
} else { \
MOVW(s4, 0); \
MOVT(s4, 0x7ff0); /* +inf */ \
VMOVfrV_D(s2, s3, v1); \
ORR_REG_LSL_IMM5(s2, s2, s3, 0); \
CMPS_REG_LSL_IMM5(s2, s4, 0); \
Bcond(cEQ, 6*4-8); /* same */ \
VMOVfrV_D(s2, s3, v2); \
ORR_REG_LSL_IMM5(s2, s2, s3, 0); \
ORR_IMM8(s4, s4, 0b10, 1); /* -inf */ \
CMPS_REG_LSL_IMM5(s2, s4, 0); \
} \
Bcond(cNE, 4+4-8); /* same */ \
MOVW(s1, 0); \
} \
STRH_IMM8(s1, xEmu, offsetof(x86emu_t, sw))

// Generate FCOMI with s1 and s2 scratch regs (the VCMP is already done)
#define FCOMI(s1, s2) \
#define FCOMI(s1, s2, s3, s4, v1, v2, is_f) \
IFX(X_CF|X_PF|X_ZF|X_PEND) { \
VMRS_APSR(); /* 0b111 */ \
BIC_IMM8(xFlags, xFlags, 0b1000101, 0); \
ORR_IMM8_COND(cVS, xFlags, xFlags, 0b01000101, 0); /* unordered */ \
ORR_IMM8_COND(cEQ, xFlags, xFlags, 0b01000000, 0); /* zero */ \
ORR_IMM8_COND(cMI, xFlags, xFlags, 0b00000001, 0); /* less than */ \
/* greater than leave 0 */ \
if(s4) { \
Bcond(cVS, (is_f?11:13)*4-8); \
if(is_f) { \
MOVW(s4, 0); \
MOVT(s4, 0x7ff0); /* +inf */ \
VMOVfrV(s2, v1); \
CMPS_REG_LSL_IMM5(s2, s4, 0); \
Bcond(cEQ, 5*4-8); /* same */ \
VMOVfrV(s2, v2); \
ORR_IMM8(s4, s4, 0b10, 1); /* -inf */ \
CMPS_REG_LSL_IMM5(s2, s4, 0); \
} else { \
MOVW(s4, 0); \
MOVT(s4, 0x7ff0); /* +inf */ \
VMOVfrV_D(s2, s3, v1); \
ORR_REG_LSL_IMM5(s2, s2, s3, 0); \
CMPS_REG_LSL_IMM5(s2, s4, 0); \
Bcond(cEQ, 6*4-8); /* same */ \
VMOVfrV_D(s2, s3, v2); \
ORR_REG_LSL_IMM5(s2, s2, s3, 0); \
ORR_IMM8(s4, s4, 0b10, 1); /* -inf */ \
CMPS_REG_LSL_IMM5(s2, s4, 0); \
} \
Bcond(cNE, 4+4-8); /* same */ \
BIC_IMM8(xFlags, xFlags, 0b1000101, 0); \
} \
} \
SET_DFNONE(s1); \
IFX(X_OF|X_AF|X_SF|X_PEND) { \
Expand Down

0 comments on commit f1559b0

Please sign in to comment.