From f73104633b7113742a1cf8835098a6ebb6703db7 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 23 Aug 2018 14:48:17 +0100 Subject: [PATCH] a32_emit_x64: Fix incorrect BMI2 implementation for SetCpsr * The MSB for each byte in cpsr_ge were not being appropriately set. * We also expand test coverage to test this case. * We fix the disassembly of the MSR (imm) and MSR (reg) instructions as well. --- src/backend/x64/a32_emit_x64.cpp | 15 +++++++++------ .../A32/disassembler/disassembler_arm.cpp | 16 ++++++++++------ tests/A32/fuzz_arm.cpp | 6 +++--- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/backend/x64/a32_emit_x64.cpp b/src/backend/x64/a32_emit_x64.cpp index 46f7e944..1d041312 100644 --- a/src/backend/x64/a32_emit_x64.cpp +++ b/src/backend/x64/a32_emit_x64.cpp @@ -292,7 +292,7 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { // extract all of their bits together at once with one pext. static_assert(offsetof(A32JitState, CPSR_et) + 4 == offsetof(A32JitState, CPSR_ge)); code.mov(result.cvt64(), qword[r15 + offsetof(A32JitState, CPSR_et)]); - code.mov(tmp.cvt64(), 0x8080808000000003ull); + code.mov(tmp.cvt64(), 0x80808080'00000003ull); code.pext(result.cvt64(), result.cvt64(), tmp.cvt64()); code.mov(tmp, 0x000f0220); code.pdep(result, result, tmp); @@ -320,6 +320,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { Xbyak::Reg32 cpsr = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr().cvt32(); // CPSR_q code.bt(cpsr, 27); @@ -339,12 +340,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { static_assert(offsetof(A32JitState, CPSR_et) + 4 == offsetof(A32JitState, CPSR_ge)); code.mov(tmp, 0x000f0220); code.pext(cpsr, cpsr, tmp); - code.mov(tmp.cvt64(), 0x8080808000000003ull); + code.mov(tmp.cvt64(), 0x01010101'00000003ull); code.pdep(cpsr.cvt64(), cpsr.cvt64(), tmp.cvt64()); - code.mov(tmp.cvt64(), cpsr.cvt64()); - code.shr(tmp.cvt64(), 7); - code.sub(cpsr.cvt64(), tmp.cvt64()); - code.mov(qword[r15 + offsetof(A32JitState, CPSR_et)], cpsr.cvt64()); + // We perform SWAR partitioned subtraction here, to negate the GE bytes. + code.mov(tmp.cvt64(), 0x80808080'00000003ull); + code.mov(tmp2.cvt64(), tmp.cvt64()); + code.sub(tmp.cvt64(), cpsr.cvt64()); + code.xor_(tmp.cvt64(), tmp2.cvt64()); + code.mov(qword[r15 + offsetof(A32JitState, CPSR_et)], tmp.cvt64()); } else { ctx.reg_alloc.HostCall(nullptr, args[0]); code.mov(code.ABI_PARAM2, code.r15); diff --git a/src/frontend/A32/disassembler/disassembler_arm.cpp b/src/frontend/A32/disassembler/disassembler_arm.cpp index 5bf44432..297b1193 100644 --- a/src/frontend/A32/disassembler/disassembler_arm.cpp +++ b/src/frontend/A32/disassembler/disassembler_arm.cpp @@ -878,14 +878,18 @@ public: return fmt::format("mrs{} {}, apsr", CondToString(cond), d); } std::string arm_MSR_imm(Cond cond, int mask, int rotate, Imm8 imm8) { - bool write_nzcvq = Common::Bit<1>(mask); - bool write_g = Common::Bit<0>(mask); - return fmt::format("msr{} apsr_{}{}, #{}", CondToString(cond), write_nzcvq ? "nzcvq" : "", write_g ? "g" : "", ArmExpandImm(rotate, imm8)); + const bool write_c = Common::Bit<0>(mask); + const bool write_x = Common::Bit<1>(mask); + const bool write_s = Common::Bit<2>(mask); + const bool write_f = Common::Bit<3>(mask); + return fmt::format("msr{} cpsr_{}{}{}{}, #{}", CondToString(cond), write_c ? "c" : "", write_x ? "x" : "", write_s ? "s" : "", write_f ? "f" : "", ArmExpandImm(rotate, imm8)); } std::string arm_MSR_reg(Cond cond, int mask, Reg n) { - bool write_nzcvq = Common::Bit<1>(mask); - bool write_g = Common::Bit<0>(mask); - return fmt::format("msr{} apsr_{}{}, {}", CondToString(cond), write_nzcvq ? "nzcvq" : "", write_g ? "g" : "", n); + const bool write_c = Common::Bit<0>(mask); + const bool write_x = Common::Bit<1>(mask); + const bool write_s = Common::Bit<2>(mask); + const bool write_f = Common::Bit<3>(mask); + return fmt::format("msr{} cpsr_{}{}{}{}, {}", CondToString(cond), write_c ? "c" : "", write_x ? "x" : "", write_s ? "s" : "", write_f ? "f" : "", n); } std::string arm_RFE() { return "ice"; } std::string arm_SETEND(bool E) { diff --git a/tests/A32/fuzz_arm.cpp b/tests/A32/fuzz_arm.cpp index 9e10ac3a..e2a073b5 100644 --- a/tests/A32/fuzz_arm.cpp +++ b/tests/A32/fuzz_arm.cpp @@ -1062,7 +1062,7 @@ TEST_CASE("Test ARM misc instructions", "[JitX64][A32]") { TEST_CASE("Test ARM MSR instructions", "[JitX64][A32]") { const auto is_msr_valid = [](u32 instr) -> bool { - return Bits<18, 19>(instr) != 0; + return Bits<16, 19>(instr) != 0; }; const auto is_msr_reg_valid = [&is_msr_valid](u32 instr) -> bool { @@ -1074,8 +1074,8 @@ TEST_CASE("Test ARM MSR instructions", "[JitX64][A32]") { }; const std::array instructions = {{ - InstructionGenerator("cccc00110010mm001111rrrrvvvvvvvv", is_msr_valid), // MSR (imm) - InstructionGenerator("cccc00010010mm00111100000000nnnn", is_msr_reg_valid), // MSR (reg) + InstructionGenerator("cccc00110010mmmm1111rrrrvvvvvvvv", is_msr_valid), // MSR (imm) + InstructionGenerator("cccc00010010mmmm111100000000nnnn", is_msr_reg_valid), // MSR (reg) InstructionGenerator("cccc000100001111dddd000000000000", is_mrs_valid), // MRS }};