a32_emit_x64: Fix incorrect BMI2 implementation for SetCpsr

* The MSB for each byte in cpsr_ge were not being appropriately set.
* We also expand test coverage to test this case.
* We fix the disassembly of the MSR (imm) and MSR (reg) instructions as well.
This commit is contained in:
MerryMage 2018-08-23 14:48:17 +01:00
parent 3432a08e0a
commit f73104633b
3 changed files with 22 additions and 15 deletions

View file

@ -292,7 +292,7 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
// extract all of their bits together at once with one pext. // extract all of their bits together at once with one pext.
static_assert(offsetof(A32JitState, CPSR_et) + 4 == offsetof(A32JitState, CPSR_ge)); static_assert(offsetof(A32JitState, CPSR_et) + 4 == offsetof(A32JitState, CPSR_ge));
code.mov(result.cvt64(), qword[r15 + offsetof(A32JitState, CPSR_et)]); code.mov(result.cvt64(), qword[r15 + offsetof(A32JitState, CPSR_et)]);
code.mov(tmp.cvt64(), 0x8080808000000003ull); code.mov(tmp.cvt64(), 0x80808080'00000003ull);
code.pext(result.cvt64(), result.cvt64(), tmp.cvt64()); code.pext(result.cvt64(), result.cvt64(), tmp.cvt64());
code.mov(tmp, 0x000f0220); code.mov(tmp, 0x000f0220);
code.pdep(result, result, tmp); code.pdep(result, result, tmp);
@ -320,6 +320,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
Xbyak::Reg32 cpsr = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); Xbyak::Reg32 cpsr = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr().cvt32();
// CPSR_q // CPSR_q
code.bt(cpsr, 27); code.bt(cpsr, 27);
@ -339,12 +340,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
static_assert(offsetof(A32JitState, CPSR_et) + 4 == offsetof(A32JitState, CPSR_ge)); static_assert(offsetof(A32JitState, CPSR_et) + 4 == offsetof(A32JitState, CPSR_ge));
code.mov(tmp, 0x000f0220); code.mov(tmp, 0x000f0220);
code.pext(cpsr, cpsr, tmp); code.pext(cpsr, cpsr, tmp);
code.mov(tmp.cvt64(), 0x8080808000000003ull); code.mov(tmp.cvt64(), 0x01010101'00000003ull);
code.pdep(cpsr.cvt64(), cpsr.cvt64(), tmp.cvt64()); code.pdep(cpsr.cvt64(), cpsr.cvt64(), tmp.cvt64());
code.mov(tmp.cvt64(), cpsr.cvt64()); // We perform SWAR partitioned subtraction here, to negate the GE bytes.
code.shr(tmp.cvt64(), 7); code.mov(tmp.cvt64(), 0x80808080'00000003ull);
code.sub(cpsr.cvt64(), tmp.cvt64()); code.mov(tmp2.cvt64(), tmp.cvt64());
code.mov(qword[r15 + offsetof(A32JitState, CPSR_et)], cpsr.cvt64()); code.sub(tmp.cvt64(), cpsr.cvt64());
code.xor_(tmp.cvt64(), tmp2.cvt64());
code.mov(qword[r15 + offsetof(A32JitState, CPSR_et)], tmp.cvt64());
} else { } else {
ctx.reg_alloc.HostCall(nullptr, args[0]); ctx.reg_alloc.HostCall(nullptr, args[0]);
code.mov(code.ABI_PARAM2, code.r15); code.mov(code.ABI_PARAM2, code.r15);

View file

@ -878,14 +878,18 @@ public:
return fmt::format("mrs{} {}, apsr", CondToString(cond), d); return fmt::format("mrs{} {}, apsr", CondToString(cond), d);
} }
std::string arm_MSR_imm(Cond cond, int mask, int rotate, Imm8 imm8) { std::string arm_MSR_imm(Cond cond, int mask, int rotate, Imm8 imm8) {
bool write_nzcvq = Common::Bit<1>(mask); const bool write_c = Common::Bit<0>(mask);
bool write_g = Common::Bit<0>(mask); const bool write_x = Common::Bit<1>(mask);
return fmt::format("msr{} apsr_{}{}, #{}", CondToString(cond), write_nzcvq ? "nzcvq" : "", write_g ? "g" : "", ArmExpandImm(rotate, imm8)); const bool write_s = Common::Bit<2>(mask);
const bool write_f = Common::Bit<3>(mask);
return fmt::format("msr{} cpsr_{}{}{}{}, #{}", CondToString(cond), write_c ? "c" : "", write_x ? "x" : "", write_s ? "s" : "", write_f ? "f" : "", ArmExpandImm(rotate, imm8));
} }
std::string arm_MSR_reg(Cond cond, int mask, Reg n) { std::string arm_MSR_reg(Cond cond, int mask, Reg n) {
bool write_nzcvq = Common::Bit<1>(mask); const bool write_c = Common::Bit<0>(mask);
bool write_g = Common::Bit<0>(mask); const bool write_x = Common::Bit<1>(mask);
return fmt::format("msr{} apsr_{}{}, {}", CondToString(cond), write_nzcvq ? "nzcvq" : "", write_g ? "g" : "", n); const bool write_s = Common::Bit<2>(mask);
const bool write_f = Common::Bit<3>(mask);
return fmt::format("msr{} cpsr_{}{}{}{}, {}", CondToString(cond), write_c ? "c" : "", write_x ? "x" : "", write_s ? "s" : "", write_f ? "f" : "", n);
} }
std::string arm_RFE() { return "ice"; } std::string arm_RFE() { return "ice"; }
std::string arm_SETEND(bool E) { std::string arm_SETEND(bool E) {

View file

@ -1062,7 +1062,7 @@ TEST_CASE("Test ARM misc instructions", "[JitX64][A32]") {
TEST_CASE("Test ARM MSR instructions", "[JitX64][A32]") { TEST_CASE("Test ARM MSR instructions", "[JitX64][A32]") {
const auto is_msr_valid = [](u32 instr) -> bool { const auto is_msr_valid = [](u32 instr) -> bool {
return Bits<18, 19>(instr) != 0; return Bits<16, 19>(instr) != 0;
}; };
const auto is_msr_reg_valid = [&is_msr_valid](u32 instr) -> bool { const auto is_msr_reg_valid = [&is_msr_valid](u32 instr) -> bool {
@ -1074,8 +1074,8 @@ TEST_CASE("Test ARM MSR instructions", "[JitX64][A32]") {
}; };
const std::array<InstructionGenerator, 3> instructions = {{ const std::array<InstructionGenerator, 3> instructions = {{
InstructionGenerator("cccc00110010mm001111rrrrvvvvvvvv", is_msr_valid), // MSR (imm) InstructionGenerator("cccc00110010mmmm1111rrrrvvvvvvvv", is_msr_valid), // MSR (imm)
InstructionGenerator("cccc00010010mm00111100000000nnnn", is_msr_reg_valid), // MSR (reg) InstructionGenerator("cccc00010010mmmm111100000000nnnn", is_msr_reg_valid), // MSR (reg)
InstructionGenerator("cccc000100001111dddd000000000000", is_mrs_valid), // MRS InstructionGenerator("cccc000100001111dddd000000000000", is_mrs_valid), // MRS
}}; }};