From 5c6fcf378fda4b8ff1fa57fce2c4c1369356cb91 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 12 Dec 2017 14:19:48 +0000 Subject: [PATCH] emit_x64: Optimize code emitted by EmitGetCpsr --- src/backend_x64/emit_x64.cpp | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index dc111a23..b8324e44 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -196,9 +196,35 @@ static u32 GetCpsrImpl(JitState* jit_state) { } void EmitX64::EmitGetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - reg_alloc.HostCall(inst); - code->mov(code->ABI_PARAM1, code->r15); - code->CallFunction(&GetCpsrImpl); + if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 b = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 c = reg_alloc.ScratchGpr().cvt32(); + + code->mov(c, dword[r15 + offsetof(JitState, CPSR_ge)]); + // Here we observe that CPSR_q and CPSR_nzcv are right next to each other in memory, + // so we load them both at the same time with one 64-bit read. This allows us to + // extract all of their bits together at once with one pext. + code->mov(result.cvt64(), qword[r15 + offsetof(JitState, CPSR_q)]); + code->mov(b.cvt64(), 0xF000000000000001ull); + code->pext(result.cvt64(), result.cvt64(), b.cvt64()); + code->mov(b, 0x80808080); + code->pext(c.cvt64(), c.cvt64(), b.cvt64()); + code->shl(result, 27); + code->shl(c, 16); + code->or_(result, c); + code->mov(b, 0x00000220); + code->mov(c, dword[r15 + offsetof(JitState, CPSR_et)]); + code->pdep(c.cvt64(), c.cvt64(), b.cvt64()); + code->or_(result, dword[r15 + offsetof(JitState, CPSR_jaifm)]); + code->or_(result, c); + + reg_alloc.DefineValue(inst, result); + } else { + reg_alloc.HostCall(inst); + code->mov(code->ABI_PARAM1, code->r15); + code->CallFunction(&GetCpsrImpl); + } } static void SetCpsrImpl(u32 value, JitState* jit_state) {