jit_state: Split off CPSR.{E,T}

This allows us to improve code-emission for PopRSBHint. We also improve
code emission other terminals at the same time.
This commit is contained in:
MerryMage 2017-12-02 15:24:10 +00:00
parent 3cca3bbd0b
commit 311361b409
5 changed files with 67 additions and 72 deletions

View file

@ -26,10 +26,10 @@ computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block.
u64 LocationDescriptor::UniqueHash() const { u64 LocationDescriptor::UniqueHash() const {
// This value MUST BE UNIQUE. // This value MUST BE UNIQUE.
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
u64 pc_u64 = u64(arm_pc); u64 pc_u64 = u64(arm_pc) << 32;
u64 fpscr_u64 = u64(fpscr.Value()) << 32; u64 fpscr_u64 = u64(fpscr.Value());
u64 t_u64 = cpsr.T() ? (1ull << 35) : 0; u64 t_u64 = cpsr.T() ? 1 : 0;
u64 e_u64 = cpsr.E() ? (1ull << 39) : 0; u64 e_u64 = cpsr.E() ? 2 : 0;
return pc_u64 | fpscr_u64 | t_u64 | e_u64; return pc_u64 | fpscr_u64 | t_u64 | e_u64;
} }
@ -120,12 +120,10 @@ To check if a predicition is in the RSB, we linearly scan the RSB.
using namespace Xbyak::util; using namespace Xbyak::util;
// This calculation has to match up with IREmitter::PushRSB // This calculation has to match up with IREmitter::PushRSB
code->mov(ebx, MJitStateCpsr());
code->mov(ecx, MJitStateReg(Arm::Reg::PC)); code->mov(ecx, MJitStateReg(Arm::Reg::PC));
code->and_(ebx, u32((1 << 5) | (1 << 9))); code->shl(rcx, 32);
code->shr(ebx, 2); code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
code->or_(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]); code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]);
code->shl(rbx, 32);
code->or_(rbx, rcx); code->or_(rbx, rcx);
code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));

View file

@ -362,12 +362,12 @@ void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
} }
} }
void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
using namespace Xbyak::util;
auto args = reg_alloc.GetArgumentInfo(inst); auto args = reg_alloc.GetArgumentInfo(inst);
auto& arg = args[0]; auto& arg = args[0];
const u32 T_bit = 1 << 5;
// Pseudocode: // Pseudocode:
// if (new_pc & 1) { // if (new_pc & 1) {
// new_pc &= 0xFFFFFFFE; // new_pc &= 0xFFFFFFFE;
@ -376,36 +376,41 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
// new_pc &= 0xFFFFFFFC; // new_pc &= 0xFFFFFFFC;
// cpsr.T = false; // cpsr.T = false;
// } // }
// We rely on the fact we disallow EFlag from changing within a block.
if (arg.IsImmediate()) { if (arg.IsImmediate()) {
u32 new_pc = arg.GetImmediateU32(); u32 new_pc = arg.GetImmediateU32();
if (Common::Bit<0>(new_pc)) { u32 mask = Common::Bit<0>(new_pc) ? 0xFFFFFFFE : 0xFFFFFFFC;
new_pc &= 0xFFFFFFFE; u32 et = 0;
code->mov(MJitStateReg(Arm::Reg::PC), new_pc); et |= block.Location().EFlag() ? 2 : 0;
code->or_(MJitStateCpsr_other(), T_bit); et |= Common::Bit<0>(new_pc) ? 1 : 0;
} else {
new_pc &= 0xFFFFFFFC;
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
code->and_(MJitStateCpsr_other(), ~T_bit);
}
} else {
using Xbyak::util::ptr;
code->mov(MJitStateReg(Arm::Reg::PC), new_pc & mask);
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], et);
} else {
if (block.Location().EFlag()) {
Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32(); Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32();
Xbyak::Reg32 tmp1 = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 mask = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 tmp2 = reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 et = reg_alloc.ScratchGpr().cvt32();
code->mov(tmp1, MJitStateCpsr_other()); code->mov(mask, new_pc);
code->mov(tmp2, tmp1); code->and_(mask, 1);
code->and_(tmp2, u32(~T_bit)); // CPSR.T = 0 code->lea(et, ptr[mask.cvt64() + 2]);
code->or_(tmp1, u32(T_bit)); // CPSR.T = 1 code->mov(dword[r15 + offsetof(JitState, CPSR_et)], et);
code->test(new_pc, u32(1)); code->lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code->cmove(tmp1, tmp2); // CPSR.T = pc & 1 code->and_(new_pc, mask);
code->mov(MJitStateCpsr_other(), tmp1);
code->lea(tmp2, ptr[new_pc.cvt64() + new_pc.cvt64() * 1]);
code->or_(tmp2, u32(0xFFFFFFFC)); // tmp2 = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code->and_(new_pc, tmp2);
code->mov(MJitStateReg(Arm::Reg::PC), new_pc); code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
} else {
Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32();
Xbyak::Reg32 mask = reg_alloc.ScratchGpr().cvt32();
code->mov(mask, new_pc);
code->and_(mask, 1);
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], mask);
code->lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code->and_(new_pc, mask);
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
}
} }
} }
@ -3371,22 +3376,18 @@ void EmitX64::EmitTerminal(IR::Term::ReturnToDispatch, IR::LocationDescriptor) {
code->ReturnFromRunCode(); code->ReturnFromRunCode();
} }
static u32 CalculateCpsr_et(const IR::LocationDescriptor& desc) {
u32 et = 0;
et |= desc.EFlag() ? 2 : 0;
et |= desc.TFlag() ? 1 : 0;
return et;
}
void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) { void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util; using namespace Xbyak::util;
if (terminal.next.TFlag() != initial_location.TFlag()) { if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) {
if (terminal.next.TFlag()) { code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next));
code->or_(MJitStateCpsr_other(), u32(1 << 5));
} else {
code->and_(MJitStateCpsr_other(), u32(~(1 << 5)));
}
}
if (terminal.next.EFlag() != initial_location.EFlag()) {
if (terminal.next.EFlag()) {
code->or_(MJitStateCpsr_other(), u32(1 << 9));
} else {
code->and_(MJitStateCpsr_other(), u32(~(1 << 9)));
}
} }
code->cmp(qword[r15 + offsetof(JitState, cycles_remaining)], 0); code->cmp(qword[r15 + offsetof(JitState, cycles_remaining)], 0);
@ -3412,19 +3413,8 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor
void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) { void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util; using namespace Xbyak::util;
if (terminal.next.TFlag() != initial_location.TFlag()) { if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) {
if (terminal.next.TFlag()) { code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next));
code->or_(MJitStateCpsr_other(), u32(1 << 5));
} else {
code->and_(MJitStateCpsr_other(), u32(~(1 << 5)));
}
}
if (terminal.next.EFlag() != initial_location.EFlag()) {
if (terminal.next.EFlag()) {
code->or_(MJitStateCpsr_other(), u32(1 << 9));
} else {
code->and_(MJitStateCpsr_other(), u32(~(1 << 9)));
}
} }
patch_information[terminal.next.UniqueHash()].jmp.emplace_back(code->getCurr()); patch_information[terminal.next.UniqueHash()].jmp.emplace_back(code->getCurr());
@ -3439,12 +3429,11 @@ void EmitX64::EmitTerminal(IR::Term::PopRSBHint, IR::LocationDescriptor) {
using namespace Xbyak::util; using namespace Xbyak::util;
// This calculation has to match up with IREmitter::PushRSB // This calculation has to match up with IREmitter::PushRSB
code->mov(ebx, MJitStateCpsr_other()); // TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et.
code->mov(ecx, MJitStateReg(Arm::Reg::PC)); code->mov(ecx, MJitStateReg(Arm::Reg::PC));
code->and_(ebx, u32((1 << 5) | (1 << 9))); code->shl(rcx, 32);
code->shr(ebx, 2); code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
code->or_(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]); code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]);
code->shl(rbx, 32);
code->or_(rbx, rcx); code->or_(rbx, rcx);
code->mov(eax, dword[r15 + offsetof(JitState, rsb_ptr)]); code->mov(eax, dword[r15 + offsetof(JitState, rsb_ptr)]);

View file

@ -52,6 +52,9 @@ u32 JitState::Cpsr() const {
cpsr |= Common::Bit<23>(CPSR_ge) ? 1 << 18 : 0; cpsr |= Common::Bit<23>(CPSR_ge) ? 1 << 18 : 0;
cpsr |= Common::Bit<15>(CPSR_ge) ? 1 << 17 : 0; cpsr |= Common::Bit<15>(CPSR_ge) ? 1 << 17 : 0;
cpsr |= Common::Bit<7>(CPSR_ge) ? 1 << 16 : 0; cpsr |= Common::Bit<7>(CPSR_ge) ? 1 << 16 : 0;
// E flag, T flag
cpsr |= Common::Bit<1>(CPSR_et) ? 1 << 9 : 0;
cpsr |= Common::Bit<0>(CPSR_et) ? 1 << 5 : 0;
// Other flags // Other flags
cpsr |= CPSR_other; cpsr |= CPSR_other;
@ -65,8 +68,12 @@ void JitState::SetCpsr(u32 cpsr) {
CPSR_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0; CPSR_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0;
CPSR_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0; CPSR_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0;
CPSR_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0; CPSR_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0;
// E flag, T flag
CPSR_et = 0;
CPSR_et |= Common::Bit<9>(cpsr) ? 2 : 0;
CPSR_et |= Common::Bit<5>(cpsr) ? 1 : 0;
// Other flags // Other flags
CPSR_other = cpsr & 0xFFF0FFFF; CPSR_other = cpsr & 0xFFF0FDDF;
} }
void JitState::ResetRSB() { void JitState::ResetRSB() {

View file

@ -30,6 +30,7 @@ struct JitState {
u32 CPSR_other = 0; u32 CPSR_other = 0;
u32 CPSR_ge = 0; u32 CPSR_ge = 0;
u32 CPSR_et = 0;
u32 Cpsr() const; u32 Cpsr() const;
void SetCpsr(u32 cpsr); void SetCpsr(u32 cpsr);

View file

@ -75,10 +75,10 @@ public:
u64 UniqueHash() const { u64 UniqueHash() const {
// This value MUST BE UNIQUE. // This value MUST BE UNIQUE.
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
u64 pc_u64 = u64(arm_pc); u64 pc_u64 = u64(arm_pc) << 32;
u64 fpscr_u64 = u64(fpscr.Value()) << 32; u64 fpscr_u64 = u64(fpscr.Value());
u64 t_u64 = cpsr.T() ? (1ull << 35) : 0; u64 t_u64 = cpsr.T() ? 1 : 0;
u64 e_u64 = cpsr.E() ? (1ull << 39) : 0; u64 e_u64 = cpsr.E() ? 2 : 0;
return pc_u64 | fpscr_u64 | t_u64 | e_u64; return pc_u64 | fpscr_u64 | t_u64 | e_u64;
} }