From f1559b066c9b3394ccc3325f71892c5a1b3c911f Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Wed, 4 Dec 2024 11:13:17 +0100 Subject: [PATCH] [DYNAREC] Improved x87 comparison (backporteed from Box64) --- src/dynarec/dynarec_arm_0f.c | 2 +- src/dynarec/dynarec_arm_660f.c | 2 +- src/dynarec/dynarec_arm_d8.c | 8 ++--- src/dynarec/dynarec_arm_d9.c | 2 +- src/dynarec/dynarec_arm_da.c | 6 ++-- src/dynarec/dynarec_arm_db.c | 4 +-- src/dynarec/dynarec_arm_dc.c | 8 ++--- src/dynarec/dynarec_arm_dd.c | 4 +-- src/dynarec/dynarec_arm_de.c | 4 +-- src/dynarec/dynarec_arm_df.c | 4 +-- src/dynarec/dynarec_arm_helper.h | 56 ++++++++++++++++++++++++++++++-- 11 files changed, 76 insertions(+), 24 deletions(-) diff --git a/src/dynarec/dynarec_arm_0f.c b/src/dynarec/dynarec_arm_0f.c index 55cbc9d4e..e86c04f68 100755 --- a/src/dynarec/dynarec_arm_0f.c +++ b/src/dynarec/dynarec_arm_0f.c @@ -475,7 +475,7 @@ uintptr_t dynarec0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, VMOVD(d1, v0); } VCMP_F32(d1*2, s0); - FCOMI(x1, x2); + FCOMI(x1, x2, 0, 0, d1*2, s0, 1); break; case 0x31: diff --git a/src/dynarec/dynarec_arm_660f.c b/src/dynarec/dynarec_arm_660f.c index 3a9a88ec6..0302aeeb9 100755 --- a/src/dynarec/dynarec_arm_660f.c +++ b/src/dynarec/dynarec_arm_660f.c @@ -282,7 +282,7 @@ uintptr_t dynarec660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nins v0 = sse_get_reg(dyn, ninst, x1, gd, 0); GETEX(q0, 0); VCMP_F64(v0, q0); - FCOMI(x1, x2); + FCOMI(x1, x2, 0, 0, v0, q0, 0); break; case 0x38: // SSSE3 opcodes diff --git a/src/dynarec/dynarec_arm_d8.c b/src/dynarec/dynarec_arm_d8.c index 33be9d9f9..885f351df 100755 --- a/src/dynarec/dynarec_arm_d8.c +++ b/src/dynarec/dynarec_arm_d8.c @@ -100,7 +100,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64(v1, v2); } - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0)); break; case 0xD8: case 0xD9: @@ -118,7 +118,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64(v1, v2); } - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0)); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xE0: @@ -275,7 +275,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, VCVT_F64_F32(d1, s0); VCMP_F64(v1, d1); } - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, ST_IS_F(0)?s0:d1, ST_IS_F(0)); break; case 3: INST_NAME("FCOMP ST0, float[ED]"); @@ -296,7 +296,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, VCVT_F64_F32(d1, s0); VCMP_F64(v1, d1); } - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, ST_IS_F(0)?s0:d1, ST_IS_F(0)); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 4: diff --git a/src/dynarec/dynarec_arm_d9.c b/src/dynarec/dynarec_arm_d9.c index 4ac272c85..76c56e2ee 100755 --- a/src/dynarec/dynarec_arm_d9.c +++ b/src/dynarec/dynarec_arm_d9.c @@ -126,7 +126,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64_0(v1); } - FCOM(x1, x2); // same flags... + FCOM(x1, x2, 0, 0, v1, 0, ST_IS_F(0)); // same flags... break; case 0xE5: INST_NAME("FXAM"); diff --git a/src/dynarec/dynarec_arm_da.c b/src/dynarec/dynarec_arm_da.c index 3253d1981..48bc74c1e 100755 --- a/src/dynarec/dynarec_arm_da.c +++ b/src/dynarec/dynarec_arm_da.c @@ -128,7 +128,7 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64(v1, v2); } - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0)); X87_POP_OR_FAIL(dyn, ninst, x3); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -185,7 +185,7 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); VCMP_F64(v1, d0); - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, d0, 0); break; case 3: INST_NAME("FICOMP ST0, Ed"); @@ -196,7 +196,7 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); VCMP_F64(v1, d0); - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, d0, 0); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 4: diff --git a/src/dynarec/dynarec_arm_db.c b/src/dynarec/dynarec_arm_db.c index cb93bc354..9a63e0a4b 100755 --- a/src/dynarec/dynarec_arm_db.c +++ b/src/dynarec/dynarec_arm_db.c @@ -149,7 +149,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64(v1, v2); } - FCOMI(x1, x2); + FCOMI(x1, x2, x3, x14, v1, v2, ST_IS_F(0)); break; case 0xF0: case 0xF1: @@ -168,7 +168,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64(v1, v2); } - FCOMI(x1, x2); + FCOMI(x1, x2, x3, x14, v1, v2, ST_IS_F(0)); break; case 0xE0: diff --git a/src/dynarec/dynarec_arm_dc.c b/src/dynarec/dynarec_arm_dc.c index 13310c264..045b5e305 100755 --- a/src/dynarec/dynarec_arm_dc.c +++ b/src/dynarec/dynarec_arm_dc.c @@ -96,7 +96,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64(v1, v2); } - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0)); break; case 0xD8: case 0xD9: @@ -114,7 +114,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64(v1, v2); } - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0)); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xE0: @@ -258,7 +258,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, VMOVtoV_D(d1, x2, x3); } VCMP_F64(v1, d1); - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, d1, 0); break; case 3: INST_NAME("FCOMP ST0, double[ED]"); @@ -275,7 +275,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, VMOVtoV_D(d1, x2, x3); } VCMP_F64(v1, d1); - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, d1, 0); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 4: diff --git a/src/dynarec/dynarec_arm_dd.c b/src/dynarec/dynarec_arm_dd.c index d56fb9853..19d7840d4 100755 --- a/src/dynarec/dynarec_arm_dd.c +++ b/src/dynarec/dynarec_arm_dd.c @@ -111,7 +111,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64(v1, v2); } - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0)); break; case 0xE8: case 0xE9: @@ -129,7 +129,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64(v1, v2); } - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0)); X87_POP_OR_FAIL(dyn, ninst, x3); break; diff --git a/src/dynarec/dynarec_arm_de.c b/src/dynarec/dynarec_arm_de.c index 4ff403b29..6214fa94e 100755 --- a/src/dynarec/dynarec_arm_de.c +++ b/src/dynarec/dynarec_arm_de.c @@ -94,7 +94,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64(v1, v2); } - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0)); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -107,7 +107,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64(v1, v2); } - FCOM(x1, x2); + FCOM(x1, x2, x3, x14, v1, v2, ST_IS_F(0)); X87_POP_OR_FAIL(dyn, ninst, x3); X87_POP_OR_FAIL(dyn, ninst, x3); break; diff --git a/src/dynarec/dynarec_arm_df.c b/src/dynarec/dynarec_arm_df.c index c36206a5b..95e461301 100755 --- a/src/dynarec/dynarec_arm_df.c +++ b/src/dynarec/dynarec_arm_df.c @@ -90,7 +90,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64(v1, v2); } - FCOMI(x1, x2); + FCOMI(x1, x2, x3, x14, v1, v2, ST_IS_F(0)); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xF0: @@ -111,7 +111,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VCMP_F64(v1, v2); } - FCOMI(x1, x2); + FCOMI(x1, x2, x3, x14, v1, v2, ST_IS_F(0)); X87_POP_OR_FAIL(dyn, ninst, x3); break; diff --git a/src/dynarec/dynarec_arm_helper.h b/src/dynarec/dynarec_arm_helper.h index 50ee99fb6..e2e8d8237 100755 --- a/src/dynarec/dynarec_arm_helper.h +++ b/src/dynarec/dynarec_arm_helper.h @@ -316,7 +316,7 @@ #define IFXN(A, B) if((dyn->insts[ninst].x86.gen_flags&(A) && !(dyn->insts[ninst].x86.gen_flags&(B)))) // Generate FCOM with s1 and s2 scratch regs (the VCMP is already done) -#define FCOM(s1, s2) \ +#define FCOM(s1, s2, s3, s4, v1, v2, is_f) \ VMRS_APSR(); /* 0b0100011100000000 */ \ LDRH_IMM8(s1, xEmu, offsetof(x86emu_t, sw)); /*offset is 8bits right?*/ \ BIC_IMM8(s1, s1, 0b01000111, 12); \ @@ -324,10 +324,36 @@ ORR_IMM8_COND(cEQ, s1, s1, 0b01000000, 12); /* equal */ \ ORR_IMM8_COND(cMI, s1, s1, 0b00000001, 12); /* less than */ \ /* greater than leave 0 */ \ + if(s4) { \ + Bcond(cVS, (is_f?11:13)*4-8); \ + if(is_f) { \ + MOVW(s4, 0); \ + MOVT(s4, 0x7ff0); /* +inf */ \ + VMOVfrV(s2, v1); \ + CMPS_REG_LSL_IMM5(s2, s4, 0); \ + Bcond(cEQ, 5*4-8); /* same */ \ + VMOVfrV(s2, v2); \ + ORR_IMM8(s4, s4, 0b10, 1); /* -inf */ \ + CMPS_REG_LSL_IMM5(s2, s4, 0); \ + } else { \ + MOVW(s4, 0); \ + MOVT(s4, 0x7ff0); /* +inf */ \ + VMOVfrV_D(s2, s3, v1); \ + ORR_REG_LSL_IMM5(s2, s2, s3, 0); \ + CMPS_REG_LSL_IMM5(s2, s4, 0); \ + Bcond(cEQ, 6*4-8); /* same */ \ + VMOVfrV_D(s2, s3, v2); \ + ORR_REG_LSL_IMM5(s2, s2, s3, 0); \ + ORR_IMM8(s4, s4, 0b10, 1); /* -inf */ \ + CMPS_REG_LSL_IMM5(s2, s4, 0); \ + } \ + Bcond(cNE, 4+4-8); /* same */ \ + MOVW(s1, 0); \ + } \ STRH_IMM8(s1, xEmu, offsetof(x86emu_t, sw)) // Generate FCOMI with s1 and s2 scratch regs (the VCMP is already done) -#define FCOMI(s1, s2) \ +#define FCOMI(s1, s2, s3, s4, v1, v2, is_f) \ IFX(X_CF|X_PF|X_ZF|X_PEND) { \ VMRS_APSR(); /* 0b111 */ \ BIC_IMM8(xFlags, xFlags, 0b1000101, 0); \ @@ -335,6 +361,32 @@ ORR_IMM8_COND(cEQ, xFlags, xFlags, 0b01000000, 0); /* zero */ \ ORR_IMM8_COND(cMI, xFlags, xFlags, 0b00000001, 0); /* less than */ \ /* greater than leave 0 */ \ + if(s4) { \ + Bcond(cVS, (is_f?11:13)*4-8); \ + if(is_f) { \ + MOVW(s4, 0); \ + MOVT(s4, 0x7ff0); /* +inf */ \ + VMOVfrV(s2, v1); \ + CMPS_REG_LSL_IMM5(s2, s4, 0); \ + Bcond(cEQ, 5*4-8); /* same */ \ + VMOVfrV(s2, v2); \ + ORR_IMM8(s4, s4, 0b10, 1); /* -inf */ \ + CMPS_REG_LSL_IMM5(s2, s4, 0); \ + } else { \ + MOVW(s4, 0); \ + MOVT(s4, 0x7ff0); /* +inf */ \ + VMOVfrV_D(s2, s3, v1); \ + ORR_REG_LSL_IMM5(s2, s2, s3, 0); \ + CMPS_REG_LSL_IMM5(s2, s4, 0); \ + Bcond(cEQ, 6*4-8); /* same */ \ + VMOVfrV_D(s2, s3, v2); \ + ORR_REG_LSL_IMM5(s2, s2, s3, 0); \ + ORR_IMM8(s4, s4, 0b10, 1); /* -inf */ \ + CMPS_REG_LSL_IMM5(s2, s4, 0); \ + } \ + Bcond(cNE, 4+4-8); /* same */ \ + BIC_IMM8(xFlags, xFlags, 0b1000101, 0); \ + } \ } \ SET_DFNONE(s1); \ IFX(X_OF|X_AF|X_SF|X_PEND) { \