diff --git a/src/backend_x64/a32_emit_x64.cpp b/src/backend_x64/a32_emit_x64.cpp index 9df22d48..5017f78f 100644 --- a/src/backend_x64/a32_emit_x64.cpp +++ b/src/backend_x64/a32_emit_x64.cpp @@ -70,18 +70,18 @@ bool A32EmitContext::FPSCR_DN() const { return Location().FPSCR().DN(); } -A32EmitX64::A32EmitX64(BlockOfCode* code, A32::UserConfig config, A32::Jit* jit_interface) +A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface) : EmitX64(code), config(config), jit_interface(jit_interface) { GenMemoryAccessors(); - code->PreludeComplete(); + code.PreludeComplete(); } A32EmitX64::~A32EmitX64() = default; A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { - code->align(); - const u8* const entrypoint = code->getCurr(); + code.align(); + const u8* const entrypoint = code.getCurr(); // Start emitting. EmitCondPrelude(block); @@ -121,12 +121,12 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { EmitAddCycles(block.CycleCount()); EmitX64::EmitTerminal(block.GetTerminal(), block.Location()); - code->int3(); + code.int3(); const A32::LocationDescriptor descriptor{block.Location()}; Patch(descriptor, entrypoint); - const size_t size = static_cast(code->getCurr() - entrypoint); + const size_t size = static_cast(code.getCurr() - entrypoint); const A32::LocationDescriptor end_location{block.EndLocation()}; const auto range = boost::icl::discrete_interval::closed(descriptor.PC(), end_location.PC() - 1); A32EmitX64::BlockDescriptor block_desc{entrypoint, size}; @@ -146,68 +146,68 @@ void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set& rang } void A32EmitX64::GenMemoryAccessors() { - code->align(); - read_memory_8 = code->getCurr(); + code.align(); + read_memory_8 = code.getCurr(); ABI_PushCallerSaveRegistersAndAdjustStack(code); DEVIRT(config.callbacks, &A32::UserCallbacks::MemoryRead8).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStack(code); - code->ret(); + code.ret(); - code->align(); - read_memory_16 = code->getCurr(); + code.align(); + read_memory_16 = code.getCurr(); ABI_PushCallerSaveRegistersAndAdjustStack(code); DEVIRT(config.callbacks, &A32::UserCallbacks::MemoryRead16).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStack(code); - code->ret(); + code.ret(); - code->align(); - read_memory_32 = code->getCurr(); + code.align(); + read_memory_32 = code.getCurr(); ABI_PushCallerSaveRegistersAndAdjustStack(code); DEVIRT(config.callbacks, &A32::UserCallbacks::MemoryRead32).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStack(code); - code->ret(); + code.ret(); - code->align(); - read_memory_64 = code->getCurr(); + code.align(); + read_memory_64 = code.getCurr(); ABI_PushCallerSaveRegistersAndAdjustStack(code); DEVIRT(config.callbacks, &A32::UserCallbacks::MemoryRead64).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStack(code); - code->ret(); + code.ret(); - code->align(); - write_memory_8 = code->getCurr(); + code.align(); + write_memory_8 = code.getCurr(); ABI_PushCallerSaveRegistersAndAdjustStack(code); DEVIRT(config.callbacks, &A32::UserCallbacks::MemoryWrite8).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStack(code); - code->ret(); + code.ret(); - code->align(); - write_memory_16 = code->getCurr(); + code.align(); + write_memory_16 = code.getCurr(); ABI_PushCallerSaveRegistersAndAdjustStack(code); DEVIRT(config.callbacks, &A32::UserCallbacks::MemoryWrite16).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStack(code); - code->ret(); + code.ret(); - code->align(); - write_memory_32 = code->getCurr(); + code.align(); + write_memory_32 = code.getCurr(); ABI_PushCallerSaveRegistersAndAdjustStack(code); DEVIRT(config.callbacks, &A32::UserCallbacks::MemoryWrite32).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStack(code); - code->ret(); + code.ret(); - code->align(); - write_memory_64 = code->getCurr(); + code.align(); + write_memory_64 = code.getCurr(); ABI_PushCallerSaveRegistersAndAdjustStack(code); DEVIRT(config.callbacks, &A32::UserCallbacks::MemoryWrite64).EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStack(code); - code->ret(); + code.ret(); } void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) { A32::Reg reg = inst->GetArg(0).GetA32RegRef(); Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(result, MJitStateReg(reg)); + code.mov(result, MJitStateReg(reg)); ctx.reg_alloc.DefineValue(inst, result); } @@ -216,7 +216,7 @@ void A32EmitX64::EmitA32GetExtendedRegister32(A32EmitContext& ctx, IR::Inst* ins ASSERT(A32::IsSingleExtReg(reg)); Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code->movss(result, MJitStateExtReg(reg)); + code.movss(result, MJitStateExtReg(reg)); ctx.reg_alloc.DefineValue(inst, result); } @@ -225,7 +225,7 @@ void A32EmitX64::EmitA32GetExtendedRegister64(A32EmitContext& ctx, IR::Inst* ins ASSERT(A32::IsDoubleExtReg(reg)); Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code->movsd(result, MJitStateExtReg(reg)); + code.movsd(result, MJitStateExtReg(reg)); ctx.reg_alloc.DefineValue(inst, result); } @@ -233,13 +233,13 @@ void A32EmitX64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); A32::Reg reg = inst->GetArg(0).GetA32RegRef(); if (args[1].IsImmediate()) { - code->mov(MJitStateReg(reg), args[1].GetImmediateU32()); + code.mov(MJitStateReg(reg), args[1].GetImmediateU32()); } else if (args[1].IsInXmm()) { Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); - code->movd(MJitStateReg(reg), to_store); + code.movd(MJitStateReg(reg), to_store); } else { Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[1]).cvt32(); - code->mov(MJitStateReg(reg), to_store); + code.mov(MJitStateReg(reg), to_store); } } @@ -249,10 +249,10 @@ void A32EmitX64::EmitA32SetExtendedRegister32(A32EmitContext& ctx, IR::Inst* ins ASSERT(A32::IsSingleExtReg(reg)); if (args[1].IsInXmm()) { Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); - code->movss(MJitStateExtReg(reg), to_store); + code.movss(MJitStateExtReg(reg), to_store); } else { Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[1]).cvt32(); - code->mov(MJitStateExtReg(reg), to_store); + code.mov(MJitStateExtReg(reg), to_store); } } @@ -262,10 +262,10 @@ void A32EmitX64::EmitA32SetExtendedRegister64(A32EmitContext& ctx, IR::Inst* ins ASSERT(A32::IsDoubleExtReg(reg)); if (args[1].IsInXmm()) { Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); - code->movsd(MJitStateExtReg(reg), to_store); + code.movsd(MJitStateExtReg(reg), to_store); } else { Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]); - code->mov(MJitStateExtReg(reg), to_store); + code.mov(MJitStateExtReg(reg), to_store); } } @@ -274,34 +274,34 @@ static u32 GetCpsrImpl(A32JitState* jit_state) { } void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { - if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 c = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(c, dword[r15 + offsetof(A32JitState, CPSR_ge)]); + code.mov(c, dword[r15 + offsetof(A32JitState, CPSR_ge)]); // Here we observe that CPSR_q and CPSR_nzcv are right next to each other in memory, // so we load them both at the same time with one 64-bit read. This allows us to // extract all of their bits together at once with one pext. - code->mov(result.cvt64(), qword[r15 + offsetof(A32JitState, CPSR_q)]); - code->mov(b.cvt64(), 0xF000000000000001ull); - code->pext(result.cvt64(), result.cvt64(), b.cvt64()); - code->mov(b, 0x80808080); - code->pext(c.cvt64(), c.cvt64(), b.cvt64()); - code->shl(result, 27); - code->shl(c, 16); - code->or_(result, c); - code->mov(b, 0x00000220); - code->mov(c, dword[r15 + offsetof(A32JitState, CPSR_et)]); - code->pdep(c.cvt64(), c.cvt64(), b.cvt64()); - code->or_(result, dword[r15 + offsetof(A32JitState, CPSR_jaifm)]); - code->or_(result, c); + code.mov(result.cvt64(), qword[r15 + offsetof(A32JitState, CPSR_q)]); + code.mov(b.cvt64(), 0xF000000000000001ull); + code.pext(result.cvt64(), result.cvt64(), b.cvt64()); + code.mov(b, 0x80808080); + code.pext(c.cvt64(), c.cvt64(), b.cvt64()); + code.shl(result, 27); + code.shl(c, 16); + code.or_(result, c); + code.mov(b, 0x00000220); + code.mov(c, dword[r15 + offsetof(A32JitState, CPSR_et)]); + code.pdep(c.cvt64(), c.cvt64(), b.cvt64()); + code.or_(result, dword[r15 + offsetof(A32JitState, CPSR_jaifm)]); + code.or_(result, c); ctx.reg_alloc.DefineValue(inst, result); } else { ctx.reg_alloc.HostCall(inst); - code->mov(code->ABI_PARAM1, code->r15); - code->CallFunction(&GetCpsrImpl); + code.mov(code.ABI_PARAM1, code.r15); + code.CallFunction(&GetCpsrImpl); } } @@ -312,8 +312,8 @@ static void SetCpsrImpl(u32 value, A32JitState* jit_state) { void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, args[0]); - code->mov(code->ABI_PARAM2, code->r15); - code->CallFunction(&SetCpsrImpl); + code.mov(code.ABI_PARAM2, code.r15); + code.CallFunction(&SetCpsrImpl); } void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) { @@ -321,12 +321,12 @@ void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsImmediate()) { u32 imm = args[0].GetImmediateU32(); - code->mov(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], u32(imm & 0xF0000000)); + code.mov(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], u32(imm & 0xF0000000)); } else { Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code->and_(a, 0xF0000000); - code->mov(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], a); + code.and_(a, 0xF0000000); + code.mov(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], a); } } @@ -335,22 +335,22 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsImmediate()) { u32 imm = args[0].GetImmediateU32(); - code->mov(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], u32(imm & 0xF0000000)); - code->mov(code->byte[r15 + offsetof(A32JitState, CPSR_q)], u8((imm & 0x08000000) != 0 ? 1 : 0)); + code.mov(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], u32(imm & 0xF0000000)); + code.mov(code.byte[r15 + offsetof(A32JitState, CPSR_q)], u8((imm & 0x08000000) != 0 ? 1 : 0)); } else { Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code->bt(a, 27); - code->setc(code->byte[r15 + offsetof(A32JitState, CPSR_q)]); - code->and_(a, 0xF0000000); - code->mov(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], a); + code.bt(a, 27); + code.setc(code.byte[r15 + offsetof(A32JitState, CPSR_q)]); + code.and_(a, 0xF0000000); + code.mov(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], a); } } void A32EmitX64::EmitA32GetNFlag(A32EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(result, dword[r15 + offsetof(A32JitState, CPSR_nzcv)]); - code->shr(result, 31); + code.mov(result, dword[r15 + offsetof(A32JitState, CPSR_nzcv)]); + code.shr(result, 31); ctx.reg_alloc.DefineValue(inst, result); } @@ -360,24 +360,24 @@ void A32EmitX64::EmitA32SetNFlag(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], flag_mask); + code.or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], flag_mask); } else { - code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); + code.and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); } } else { Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code->shl(to_store, flag_bit); - code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); - code->or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], to_store); + code.shl(to_store, flag_bit); + code.and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); + code.or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], to_store); } } void A32EmitX64::EmitA32GetZFlag(A32EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(result, dword[r15 + offsetof(A32JitState, CPSR_nzcv)]); - code->shr(result, 30); - code->and_(result, 1); + code.mov(result, dword[r15 + offsetof(A32JitState, CPSR_nzcv)]); + code.shr(result, 30); + code.and_(result, 1); ctx.reg_alloc.DefineValue(inst, result); } @@ -387,24 +387,24 @@ void A32EmitX64::EmitA32SetZFlag(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], flag_mask); + code.or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], flag_mask); } else { - code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); + code.and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); } } else { Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code->shl(to_store, flag_bit); - code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); - code->or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], to_store); + code.shl(to_store, flag_bit); + code.and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); + code.or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], to_store); } } void A32EmitX64::EmitA32GetCFlag(A32EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(result, dword[r15 + offsetof(A32JitState, CPSR_nzcv)]); - code->shr(result, 29); - code->and_(result, 1); + code.mov(result, dword[r15 + offsetof(A32JitState, CPSR_nzcv)]); + code.shr(result, 29); + code.and_(result, 1); ctx.reg_alloc.DefineValue(inst, result); } @@ -414,24 +414,24 @@ void A32EmitX64::EmitA32SetCFlag(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], flag_mask); + code.or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], flag_mask); } else { - code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); + code.and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); } } else { Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code->shl(to_store, flag_bit); - code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); - code->or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], to_store); + code.shl(to_store, flag_bit); + code.and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); + code.or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], to_store); } } void A32EmitX64::EmitA32GetVFlag(A32EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(result, dword[r15 + offsetof(A32JitState, CPSR_nzcv)]); - code->shr(result, 28); - code->and_(result, 1); + code.mov(result, dword[r15 + offsetof(A32JitState, CPSR_nzcv)]); + code.shr(result, 28); + code.and_(result, 1); ctx.reg_alloc.DefineValue(inst, result); } @@ -441,16 +441,16 @@ void A32EmitX64::EmitA32SetVFlag(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { - code->or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], flag_mask); + code.or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], flag_mask); } else { - code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); + code.and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); } } else { Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code->shl(to_store, flag_bit); - code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); - code->or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], to_store); + code.shl(to_store, flag_bit); + code.and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); + code.or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], to_store); } } @@ -458,17 +458,17 @@ void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) - code->mov(dword[r15 + offsetof(A32JitState, CPSR_q)], 1); + code.mov(dword[r15 + offsetof(A32JitState, CPSR_q)], 1); } else { Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); - code->or_(code->byte[r15 + offsetof(A32JitState, CPSR_q)], to_store); + code.or_(code.byte[r15 + offsetof(A32JitState, CPSR_q)], to_store); } } void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code->movd(result, dword[r15 + offsetof(A32JitState, CPSR_ge)]); + code.movd(result, dword[r15 + offsetof(A32JitState, CPSR_ge)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -478,10 +478,10 @@ void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { if (args[0].IsInXmm()) { Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]); - code->movd(dword[r15 + offsetof(A32JitState, CPSR_ge)], to_store); + code.movd(dword[r15 + offsetof(A32JitState, CPSR_ge)], to_store); } else { Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - code->mov(dword[r15 + offsetof(A32JitState, CPSR_ge)], to_store); + code.mov(dword[r15 + offsetof(A32JitState, CPSR_ge)], to_store); } } @@ -495,25 +495,25 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst ge |= Common::Bit<17>(imm) ? 0x0000FF00 : 0; ge |= Common::Bit<16>(imm) ? 0x000000FF : 0; - code->mov(dword[r15 + offsetof(A32JitState, CPSR_ge)], ge); - } else if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { + code.mov(dword[r15 + offsetof(A32JitState, CPSR_ge)], ge); + } else if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(b, 0x01010101); - code->shr(a, 16); - code->pdep(a, a, b); - code->imul(a, a, 0xFF); - code->mov(dword[r15 + offsetof(A32JitState, CPSR_ge)], a); + code.mov(b, 0x01010101); + code.shr(a, 16); + code.pdep(a, a, b); + code.imul(a, a, 0xFF); + code.mov(dword[r15 + offsetof(A32JitState, CPSR_ge)], a); } else { Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code->shr(a, 16); - code->and_(a, 0xF); - code->imul(a, a, 0x00204081); - code->and_(a, 0x01010101); - code->imul(a, a, 0xFF); - code->mov(dword[r15 + offsetof(A32JitState, CPSR_ge)], a); + code.shr(a, 16); + code.and_(a, 0xF); + code.imul(a, a, 0x00204081); + code.and_(a, 0x01010101); + code.imul(a, a, 0xFF); + code.mov(dword[r15 + offsetof(A32JitState, CPSR_ge)], a); } } @@ -538,31 +538,31 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { et |= ctx.Location().EFlag() ? 2 : 0; et |= Common::Bit<0>(new_pc) ? 1 : 0; - code->mov(MJitStateReg(A32::Reg::PC), new_pc & mask); - code->mov(dword[r15 + offsetof(A32JitState, CPSR_et)], et); + code.mov(MJitStateReg(A32::Reg::PC), new_pc & mask); + code.mov(dword[r15 + offsetof(A32JitState, CPSR_et)], et); } else { if (ctx.Location().EFlag()) { Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32(); Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 et = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(mask, new_pc); - code->and_(mask, 1); - code->lea(et, ptr[mask.cvt64() + 2]); - code->mov(dword[r15 + offsetof(A32JitState, CPSR_et)], et); - code->lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC - code->and_(new_pc, mask); - code->mov(MJitStateReg(A32::Reg::PC), new_pc); + code.mov(mask, new_pc); + code.and_(mask, 1); + code.lea(et, ptr[mask.cvt64() + 2]); + code.mov(dword[r15 + offsetof(A32JitState, CPSR_et)], et); + code.lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC + code.and_(new_pc, mask); + code.mov(MJitStateReg(A32::Reg::PC), new_pc); } else { Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32(); Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(mask, new_pc); - code->and_(mask, 1); - code->mov(dword[r15 + offsetof(A32JitState, CPSR_et)], mask); - code->lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC - code->and_(new_pc, mask); - code->mov(MJitStateReg(A32::Reg::PC), new_pc); + code.mov(mask, new_pc); + code.and_(mask, 1); + code.mov(dword[r15 + offsetof(A32JitState, CPSR_et)], mask); + code.lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC + code.and_(new_pc, mask); + code.mov(MJitStateReg(A32::Reg::PC), new_pc); } } } @@ -570,18 +570,18 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(nullptr); - code->SwitchMxcsrOnExit(); - code->mov(code->ABI_PARAM2, qword[r15 + offsetof(A32JitState, cycles_to_run)]); - code->sub(code->ABI_PARAM2, qword[r15 + offsetof(A32JitState, cycles_remaining)]); + code.SwitchMxcsrOnExit(); + code.mov(code.ABI_PARAM2, qword[r15 + offsetof(A32JitState, cycles_to_run)]); + code.sub(code.ABI_PARAM2, qword[r15 + offsetof(A32JitState, cycles_remaining)]); DEVIRT(config.callbacks, &A32::UserCallbacks::AddTicks).EmitCall(code); ctx.reg_alloc.EndOfAllocScope(); auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0]); DEVIRT(config.callbacks, &A32::UserCallbacks::CallSVC).EmitCall(code); DEVIRT(config.callbacks, &A32::UserCallbacks::GetTicksRemaining).EmitCall(code); - code->mov(qword[r15 + offsetof(A32JitState, cycles_to_run)], code->ABI_RETURN); - code->mov(qword[r15 + offsetof(A32JitState, cycles_remaining)], code->ABI_RETURN); - code->SwitchMxcsrOnEntry(); + code.mov(qword[r15 + offsetof(A32JitState, cycles_to_run)], code.ABI_RETURN); + code.mov(qword[r15 + offsetof(A32JitState, cycles_remaining)], code.ABI_RETURN); + code.SwitchMxcsrOnEntry(); } void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) { @@ -591,8 +591,8 @@ void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) { u32 pc = args[0].GetImmediateU32(); u64 exception = args[1].GetImmediateU64(); DEVIRT(config.callbacks, &A32::UserCallbacks::ExceptionRaised).EmitCall(code, [&](Xbyak::Reg64 param1, Xbyak::Reg64 param2) { - code->mov(param1, pc); - code->mov(param2, exception); + code.mov(param1, pc); + code.mov(param2, exception); }); } @@ -602,10 +602,10 @@ static u32 GetFpscrImpl(A32JitState* jit_state) { void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.HostCall(inst); - code->mov(code->ABI_PARAM1, code->r15); + code.mov(code.ABI_PARAM1, code.r15); - code->stmxcsr(code->dword[code->r15 + offsetof(A32JitState, guest_MXCSR)]); - code->CallFunction(&GetFpscrImpl); + code.stmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]); + code.CallFunction(&GetFpscrImpl); } static void SetFpscrImpl(u32 value, A32JitState* jit_state) { @@ -615,15 +615,15 @@ static void SetFpscrImpl(u32 value, A32JitState* jit_state) { void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, args[0]); - code->mov(code->ABI_PARAM2, code->r15); + code.mov(code.ABI_PARAM2, code.r15); - code->CallFunction(&SetFpscrImpl); - code->ldmxcsr(code->dword[code->r15 + offsetof(A32JitState, guest_MXCSR)]); + code.CallFunction(&SetFpscrImpl); + code.ldmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]); } void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(result, dword[r15 + offsetof(A32JitState, FPSCR_nzcv)]); + code.mov(result, dword[r15 + offsetof(A32JitState, FPSCR_nzcv)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -631,11 +631,11 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - code->mov(dword[r15 + offsetof(A32JitState, FPSCR_nzcv)], value); + code.mov(dword[r15 + offsetof(A32JitState, FPSCR_nzcv)], value); } void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) { - code->mov(code->byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); + code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); } void A32EmitX64::EmitA32SetExclusive(A32EmitContext& ctx, IR::Inst* inst) { @@ -643,12 +643,12 @@ void A32EmitX64::EmitA32SetExclusive(A32EmitContext& ctx, IR::Inst* inst) { ASSERT(args[1].IsImmediate()); Xbyak::Reg32 address = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - code->mov(code->byte[r15 + offsetof(A32JitState, exclusive_state)], u8(1)); - code->mov(dword[r15 + offsetof(A32JitState, exclusive_address)], address); + code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(1)); + code.mov(dword[r15 + offsetof(A32JitState, exclusive_address)], address); } template -static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, const CodePtr wrapped_fn) { +static void ReadMemory(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, const CodePtr wrapped_fn) { constexpr size_t bit_size = Common::BitSize(); auto args = reg_alloc.GetArgumentInfo(inst); @@ -661,47 +661,47 @@ static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, c reg_alloc.UseScratch(args[0], ABI_PARAM2); Xbyak::Reg64 result = reg_alloc.ScratchGpr({ABI_RETURN}); - Xbyak::Reg32 vaddr = code->ABI_PARAM2.cvt32(); + Xbyak::Reg32 vaddr = code.ABI_PARAM2.cvt32(); Xbyak::Reg64 page_index = reg_alloc.ScratchGpr(); Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr(); Xbyak::Label abort, end; - code->mov(result, reinterpret_cast(config.page_table)); - code->mov(page_index.cvt32(), vaddr); - code->shr(page_index.cvt32(), 12); - code->mov(result, qword[result + page_index * 8]); - code->test(result, result); - code->jz(abort); - code->mov(page_offset.cvt32(), vaddr); - code->and_(page_offset.cvt32(), 4095); + code.mov(result, reinterpret_cast(config.page_table)); + code.mov(page_index.cvt32(), vaddr); + code.shr(page_index.cvt32(), 12); + code.mov(result, qword[result + page_index * 8]); + code.test(result, result); + code.jz(abort); + code.mov(page_offset.cvt32(), vaddr); + code.and_(page_offset.cvt32(), 4095); switch (bit_size) { case 8: - code->movzx(result, code->byte[result + page_offset]); + code.movzx(result, code.byte[result + page_offset]); break; case 16: - code->movzx(result, word[result + page_offset]); + code.movzx(result, word[result + page_offset]); break; case 32: - code->mov(result.cvt32(), dword[result + page_offset]); + code.mov(result.cvt32(), dword[result + page_offset]); break; case 64: - code->mov(result.cvt64(), qword[result + page_offset]); + code.mov(result.cvt64(), qword[result + page_offset]); break; default: ASSERT_MSG(false, "Invalid bit_size"); break; } - code->jmp(end); - code->L(abort); - code->call(wrapped_fn); - code->L(end); + code.jmp(end); + code.L(abort); + code.call(wrapped_fn); + code.L(end); reg_alloc.DefineValue(inst, result); } template -static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, const CodePtr wrapped_fn) { +static void WriteMemory(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, const CodePtr wrapped_fn) { constexpr size_t bit_size = Common::BitSize(); auto args = reg_alloc.GetArgumentInfo(inst); @@ -715,42 +715,42 @@ static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, reg_alloc.UseScratch(args[0], ABI_PARAM2); reg_alloc.UseScratch(args[1], ABI_PARAM3); - Xbyak::Reg32 vaddr = code->ABI_PARAM2.cvt32(); - Xbyak::Reg64 value = code->ABI_PARAM3; + Xbyak::Reg32 vaddr = code.ABI_PARAM2.cvt32(); + Xbyak::Reg64 value = code.ABI_PARAM3; Xbyak::Reg64 page_index = reg_alloc.ScratchGpr(); Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr(); Xbyak::Label abort, end; - code->mov(rax, reinterpret_cast(config.page_table)); - code->mov(page_index.cvt32(), vaddr); - code->shr(page_index.cvt32(), 12); - code->mov(rax, qword[rax + page_index * 8]); - code->test(rax, rax); - code->jz(abort); - code->mov(page_offset.cvt32(), vaddr); - code->and_(page_offset.cvt32(), 4095); + code.mov(rax, reinterpret_cast(config.page_table)); + code.mov(page_index.cvt32(), vaddr); + code.shr(page_index.cvt32(), 12); + code.mov(rax, qword[rax + page_index * 8]); + code.test(rax, rax); + code.jz(abort); + code.mov(page_offset.cvt32(), vaddr); + code.and_(page_offset.cvt32(), 4095); switch (bit_size) { case 8: - code->mov(code->byte[rax + page_offset], value.cvt8()); + code.mov(code.byte[rax + page_offset], value.cvt8()); break; case 16: - code->mov(word[rax + page_offset], value.cvt16()); + code.mov(word[rax + page_offset], value.cvt16()); break; case 32: - code->mov(dword[rax + page_offset], value.cvt32()); + code.mov(dword[rax + page_offset], value.cvt32()); break; case 64: - code->mov(qword[rax + page_offset], value.cvt64()); + code.mov(qword[rax + page_offset], value.cvt64()); break; default: ASSERT_MSG(false, "Invalid bit_size"); break; } - code->jmp(end); - code->L(abort); - code->call(wrapped_fn); - code->L(end); + code.jmp(end); + code.L(abort); + code.call(wrapped_fn); + code.L(end); } void A32EmitX64::EmitA32ReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { @@ -786,7 +786,7 @@ void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { } template -static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, bool prepend_high_word) { +static void ExclusiveWrite(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* inst, const A32::UserConfig& config, bool prepend_high_word) { auto args = reg_alloc.GetArgumentInfo(inst); if (prepend_high_word) { reg_alloc.HostCall(nullptr, {}, args[0], args[1], args[2]); @@ -794,26 +794,26 @@ static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* ins reg_alloc.HostCall(nullptr, {}, args[0], args[1]); } Xbyak::Reg32 passed = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers. + Xbyak::Reg32 tmp = code.ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers. Xbyak::Label end; - code->mov(passed, u32(1)); - code->cmp(code->byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); - code->je(end); - code->mov(tmp, code->ABI_PARAM2); - code->xor_(tmp, dword[r15 + offsetof(A32JitState, exclusive_address)]); - code->test(tmp, A32JitState::RESERVATION_GRANULE_MASK); - code->jne(end); - code->mov(code->byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); + code.mov(passed, u32(1)); + code.cmp(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); + code.je(end); + code.mov(tmp, code.ABI_PARAM2); + code.xor_(tmp, dword[r15 + offsetof(A32JitState, exclusive_address)]); + code.test(tmp, A32JitState::RESERVATION_GRANULE_MASK); + code.jne(end); + code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); if (prepend_high_word) { - code->mov(code->ABI_PARAM3.cvt32(), code->ABI_PARAM3.cvt32()); // zero extend to 64-bits - code->shl(code->ABI_PARAM4, 32); - code->or_(code->ABI_PARAM3, code->ABI_PARAM4); + code.mov(code.ABI_PARAM3.cvt32(), code.ABI_PARAM3.cvt32()); // zero extend to 64-bits + code.shl(code.ABI_PARAM4, 32); + code.or_(code.ABI_PARAM3, code.ABI_PARAM4); } DEVIRT(config.callbacks, fn).EmitCall(code); - code->xor_(passed, passed); - code->L(end); + code.xor_(passed, passed); + code.L(end); reg_alloc.DefineValue(inst, passed); } @@ -838,15 +838,15 @@ static void EmitCoprocessorException() { ASSERT_MSG(false, "Should raise coproc exception here"); } -static void CallCoprocCallback(BlockOfCode* code, RegAlloc& reg_alloc, A32::Jit* jit_interface, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, boost::optional arg0 = {}, boost::optional arg1 = {}) { +static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Jit* jit_interface, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, boost::optional arg0 = {}, boost::optional arg1 = {}) { reg_alloc.HostCall(inst, {}, {}, arg0, arg1); - code->mov(code->ABI_PARAM1, reinterpret_cast(jit_interface)); + code.mov(code.ABI_PARAM1, reinterpret_cast(jit_interface)); if (callback.user_arg) { - code->mov(code->ABI_PARAM2, reinterpret_cast(*callback.user_arg)); + code.mov(code.ABI_PARAM2, reinterpret_cast(*callback.user_arg)); } - code->CallFunction(callback.function); + code.CallFunction(callback.function); } void A32EmitX64::EmitA32CoprocInternalOperation(A32EmitContext& ctx, IR::Inst* inst) { @@ -906,8 +906,8 @@ void A32EmitX64::EmitA32CoprocSendOneWord(A32EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 reg_word = ctx.reg_alloc.UseGpr(args[1]).cvt32(); Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(); - code->mov(reg_destination_addr, reinterpret_cast(destination_ptr)); - code->mov(code->dword[reg_destination_addr], reg_word); + code.mov(reg_destination_addr, reinterpret_cast(destination_ptr)); + code.mov(code.dword[reg_destination_addr], reg_word); return; } @@ -946,10 +946,10 @@ void A32EmitX64::EmitA32CoprocSendTwoWords(A32EmitContext& ctx, IR::Inst* inst) Xbyak::Reg32 reg_word2 = ctx.reg_alloc.UseGpr(args[2]).cvt32(); Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(); - code->mov(reg_destination_addr, reinterpret_cast(destination_ptrs[0])); - code->mov(code->dword[reg_destination_addr], reg_word1); - code->mov(reg_destination_addr, reinterpret_cast(destination_ptrs[1])); - code->mov(code->dword[reg_destination_addr], reg_word2); + code.mov(reg_destination_addr, reinterpret_cast(destination_ptrs[0])); + code.mov(code.dword[reg_destination_addr], reg_word1); + code.mov(reg_destination_addr, reinterpret_cast(destination_ptrs[1])); + code.mov(code.dword[reg_destination_addr], reg_word2); return; } @@ -988,8 +988,8 @@ void A32EmitX64::EmitA32CoprocGetOneWord(A32EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 reg_word = ctx.reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg64 reg_source_addr = ctx.reg_alloc.ScratchGpr(); - code->mov(reg_source_addr, reinterpret_cast(source_ptr)); - code->mov(reg_word, code->dword[reg_source_addr]); + code.mov(reg_source_addr, reinterpret_cast(source_ptr)); + code.mov(reg_word, code.dword[reg_source_addr]); ctx.reg_alloc.DefineValue(inst, reg_word); @@ -1029,12 +1029,12 @@ void A32EmitX64::EmitA32CoprocGetTwoWords(A32EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(); Xbyak::Reg64 reg_tmp = ctx.reg_alloc.ScratchGpr(); - code->mov(reg_destination_addr, reinterpret_cast(source_ptrs[1])); - code->mov(reg_result.cvt32(), code->dword[reg_destination_addr]); - code->shl(reg_result, 32); - code->mov(reg_destination_addr, reinterpret_cast(source_ptrs[0])); - code->mov(reg_tmp.cvt32(), code->dword[reg_destination_addr]); - code->or_(reg_result, reg_tmp); + code.mov(reg_destination_addr, reinterpret_cast(source_ptrs[1])); + code.mov(reg_result.cvt32(), code.dword[reg_destination_addr]); + code.shl(reg_result, 32); + code.mov(reg_destination_addr, reinterpret_cast(source_ptrs[0])); + code.mov(reg_tmp.cvt32(), code.dword[reg_destination_addr]); + code.or_(reg_result, reg_tmp); ctx.reg_alloc.DefineValue(inst, reg_result); @@ -1102,16 +1102,16 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDesc ASSERT_MSG(A32::LocationDescriptor{terminal.next}.EFlag() == A32::LocationDescriptor{initial_location}.EFlag(), "Unimplemented"); ASSERT_MSG(terminal.num_instructions == 1, "Unimplemented"); - code->mov(code->ABI_PARAM2.cvt32(), A32::LocationDescriptor{terminal.next}.PC()); - code->mov(code->ABI_PARAM3.cvt32(), 1); - code->mov(MJitStateReg(A32::Reg::PC), code->ABI_PARAM2.cvt32()); - code->SwitchMxcsrOnExit(); + code.mov(code.ABI_PARAM2.cvt32(), A32::LocationDescriptor{terminal.next}.PC()); + code.mov(code.ABI_PARAM3.cvt32(), 1); + code.mov(MJitStateReg(A32::Reg::PC), code.ABI_PARAM2.cvt32()); + code.SwitchMxcsrOnExit(); DEVIRT(config.callbacks, &A32::UserCallbacks::InterpreterFallback).EmitCall(code); - code->ReturnFromRunCode(true); // TODO: Check cycles + code.ReturnFromRunCode(true); // TODO: Check cycles } void A32EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor) { - code->ReturnFromRunCode(); + code.ReturnFromRunCode(); } static u32 CalculateCpsr_et(const IR::LocationDescriptor& arg) { @@ -1124,35 +1124,35 @@ static u32 CalculateCpsr_et(const IR::LocationDescriptor& arg) { void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) { if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) { - code->mov(dword[r15 + offsetof(A32JitState, CPSR_et)], CalculateCpsr_et(terminal.next)); + code.mov(dword[r15 + offsetof(A32JitState, CPSR_et)], CalculateCpsr_et(terminal.next)); } - code->cmp(qword[r15 + offsetof(A32JitState, cycles_remaining)], 0); + code.cmp(qword[r15 + offsetof(A32JitState, cycles_remaining)], 0); - patch_information[terminal.next].jg.emplace_back(code->getCurr()); + patch_information[terminal.next].jg.emplace_back(code.getCurr()); if (auto next_bb = GetBasicBlock(terminal.next)) { EmitPatchJg(terminal.next, next_bb->entrypoint); } else { EmitPatchJg(terminal.next); } Xbyak::Label dest; - code->jmp(dest, Xbyak::CodeGenerator::T_NEAR); + code.jmp(dest, Xbyak::CodeGenerator::T_NEAR); - code->SwitchToFarCode(); - code->align(16); - code->L(dest); - code->mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC()); + code.SwitchToFarCode(); + code.align(16); + code.L(dest); + code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC()); PushRSBHelper(rax, rbx, terminal.next); - code->ForceReturnFromRunCode(); - code->SwitchToNearCode(); + code.ForceReturnFromRunCode(); + code.SwitchToNearCode(); } void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) { if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) { - code->mov(dword[r15 + offsetof(A32JitState, CPSR_et)], CalculateCpsr_et(terminal.next)); + code.mov(dword[r15 + offsetof(A32JitState, CPSR_et)], CalculateCpsr_et(terminal.next)); } - patch_information[terminal.next].jmp.emplace_back(code->getCurr()); + patch_information[terminal.next].jmp.emplace_back(code.getCurr()); if (auto next_bb = GetBasicBlock(terminal.next)) { EmitPatchJmp(terminal.next, next_bb->entrypoint); } else { @@ -1163,26 +1163,26 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::Location void A32EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor) { // This calculation has to match up with IREmitter::PushRSB // TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et. - code->mov(ecx, MJitStateReg(A32::Reg::PC)); - code->shl(rcx, 32); - code->mov(ebx, dword[r15 + offsetof(A32JitState, FPSCR_mode)]); - code->or_(ebx, dword[r15 + offsetof(A32JitState, CPSR_et)]); - code->or_(rbx, rcx); + code.mov(ecx, MJitStateReg(A32::Reg::PC)); + code.shl(rcx, 32); + code.mov(ebx, dword[r15 + offsetof(A32JitState, FPSCR_mode)]); + code.or_(ebx, dword[r15 + offsetof(A32JitState, CPSR_et)]); + code.or_(rbx, rcx); - code->mov(eax, dword[r15 + offsetof(A32JitState, rsb_ptr)]); - code->sub(eax, 1); - code->and_(eax, u32(A32JitState::RSBPtrMask)); - code->mov(dword[r15 + offsetof(A32JitState, rsb_ptr)], eax); - code->cmp(rbx, qword[r15 + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]); - code->jne(code->GetReturnFromRunCodeAddress()); - code->mov(rax, qword[r15 + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]); - code->jmp(rax); + code.mov(eax, dword[r15 + offsetof(A32JitState, rsb_ptr)]); + code.sub(eax, 1); + code.and_(eax, u32(A32JitState::RSBPtrMask)); + code.mov(dword[r15 + offsetof(A32JitState, rsb_ptr)], eax); + code.cmp(rbx, qword[r15 + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]); + code.jne(code.GetReturnFromRunCodeAddress()); + code.mov(rax, qword[r15 + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]); + code.jmp(rax); } void A32EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) { Xbyak::Label pass = EmitCond(terminal.if_); EmitTerminal(terminal.else_, initial_location); - code->L(pass); + code.L(pass); EmitTerminal(terminal.then_, initial_location); } @@ -1191,40 +1191,40 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::CheckBit, IR::LocationDescriptor) { } void A32EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) { - code->cmp(code->byte[r15 + offsetof(A32JitState, halt_requested)], u8(0)); - code->jne(code->GetForceReturnFromRunCodeAddress()); + code.cmp(code.byte[r15 + offsetof(A32JitState, halt_requested)], u8(0)); + code.jne(code.GetForceReturnFromRunCodeAddress()); EmitTerminal(terminal.else_, initial_location); } void A32EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { - const CodePtr patch_location = code->getCurr(); + const CodePtr patch_location = code.getCurr(); if (target_code_ptr) { - code->jg(target_code_ptr); + code.jg(target_code_ptr); } else { - code->mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{target_desc}.PC()); - code->jg(code->GetReturnFromRunCodeAddress()); + code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{target_desc}.PC()); + code.jg(code.GetReturnFromRunCodeAddress()); } - code->EnsurePatchLocationSize(patch_location, 14); + code.EnsurePatchLocationSize(patch_location, 14); } void A32EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { - const CodePtr patch_location = code->getCurr(); + const CodePtr patch_location = code.getCurr(); if (target_code_ptr) { - code->jmp(target_code_ptr); + code.jmp(target_code_ptr); } else { - code->mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{target_desc}.PC()); - code->jmp(code->GetReturnFromRunCodeAddress()); + code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{target_desc}.PC()); + code.jmp(code.GetReturnFromRunCodeAddress()); } - code->EnsurePatchLocationSize(patch_location, 13); + code.EnsurePatchLocationSize(patch_location, 13); } void A32EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) { if (!target_code_ptr) { - target_code_ptr = code->GetReturnFromRunCodeAddress(); + target_code_ptr = code.GetReturnFromRunCodeAddress(); } - const CodePtr patch_location = code->getCurr(); - code->mov(code->rcx, reinterpret_cast(target_code_ptr)); - code->EnsurePatchLocationSize(patch_location, 10); + const CodePtr patch_location = code.getCurr(); + code.mov(code.rcx, reinterpret_cast(target_code_ptr)); + code.EnsurePatchLocationSize(patch_location, 10); } } // namespace Dynarmic::BackendX64 diff --git a/src/backend_x64/a32_emit_x64.h b/src/backend_x64/a32_emit_x64.h index 14c28716..5237eec7 100644 --- a/src/backend_x64/a32_emit_x64.h +++ b/src/backend_x64/a32_emit_x64.h @@ -30,7 +30,7 @@ struct A32EmitContext final : public EmitContext { class A32EmitX64 final : public EmitX64 { public: - A32EmitX64(BlockOfCode* code, A32::UserConfig config, A32::Jit* jit_interface); + A32EmitX64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface); ~A32EmitX64() override; /** diff --git a/src/backend_x64/a32_interface.cpp b/src/backend_x64/a32_interface.cpp index 73f7b36e..5458489c 100644 --- a/src/backend_x64/a32_interface.cpp +++ b/src/backend_x64/a32_interface.cpp @@ -46,7 +46,7 @@ static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*Lo struct Jit::Impl { Impl(Jit* jit, A32::UserConfig config) : block_of_code(GenRunCodeCallbacks(config.callbacks, &GetCurrentBlock, this), JitStateInfo{jit_state}) - , emitter(&block_of_code, config, jit) + , emitter(block_of_code, config, jit) , config(config) , jit_interface(jit) {} diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp index 3e8a0449..8f31a7e1 100644 --- a/src/backend_x64/a64_emit_x64.cpp +++ b/src/backend_x64/a64_emit_x64.cpp @@ -52,17 +52,17 @@ bool A64EmitContext::FPSCR_DN() const { return Location().FPCR().DN(); } -A64EmitX64::A64EmitX64(BlockOfCode* code, A64::UserConfig conf) +A64EmitX64::A64EmitX64(BlockOfCode& code, A64::UserConfig conf) : EmitX64(code), conf(conf) { - code->PreludeComplete(); + code.PreludeComplete(); } A64EmitX64::~A64EmitX64() = default; A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) { - code->align(); - const u8* const entrypoint = code->getCurr(); + code.align(); + const u8* const entrypoint = code.getCurr(); // Start emitting. EmitCondPrelude(block); @@ -102,12 +102,12 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) { EmitAddCycles(block.CycleCount()); EmitX64::EmitTerminal(block.GetTerminal(), block.Location()); - code->int3(); + code.int3(); const A64::LocationDescriptor descriptor{block.Location()}; Patch(descriptor, entrypoint); - const size_t size = static_cast(code->getCurr() - entrypoint); + const size_t size = static_cast(code.getCurr() - entrypoint); const A64::LocationDescriptor end_location{block.EndLocation()}; const auto range = boost::icl::discrete_interval::closed(descriptor.PC(), end_location.PC() - 1); A64EmitX64::BlockDescriptor block_desc{entrypoint, size}; @@ -129,32 +129,32 @@ void A64EmitX64::InvalidateCacheRanges(const boost::icl::interval_set& rang void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); - code->mov(code->byte[r15 + offsetof(A64JitState, check_bit)], to_store); + code.mov(code.byte[r15 + offsetof(A64JitState, check_bit)], to_store); } void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(result, dword[r15 + offsetof(A64JitState, CPSR_nzcv)]); - code->shr(result, 29); - code->and_(result, 1); + code.mov(result, dword[r15 + offsetof(A64JitState, CPSR_nzcv)]); + code.shr(result, 29); + code.and_(result, 1); ctx.reg_alloc.DefineValue(inst, result); } void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code->and_(to_store, 0b11000001'00000001); - code->imul(to_store, to_store, 0b00010000'00100001); - code->shl(to_store, 16); - code->and_(to_store, 0xF0000000); - code->mov(dword[r15 + offsetof(A64JitState, CPSR_nzcv)], to_store); + code.and_(to_store, 0b11000001'00000001); + code.imul(to_store, to_store, 0b00010000'00100001); + code.shl(to_store, 16); + code.and_(to_store, 0xF0000000); + code.mov(dword[r15 + offsetof(A64JitState, CPSR_nzcv)], to_store); } void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) { A64::Reg reg = inst->GetArg(0).GetA64RegRef(); Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(result, dword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); + code.mov(result, dword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -162,7 +162,7 @@ void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) { A64::Reg reg = inst->GetArg(0).GetA64RegRef(); Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - code->mov(result, qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); + code.mov(result, qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -171,7 +171,7 @@ void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) { auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code->movd(result, addr); + code.movd(result, addr); ctx.reg_alloc.DefineValue(inst, result); } @@ -180,7 +180,7 @@ void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) { auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code->movq(result, addr); + code.movq(result, addr); ctx.reg_alloc.DefineValue(inst, result); } @@ -189,13 +189,13 @@ void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) { auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code->movaps(result, addr); + code.movaps(result, addr); ctx.reg_alloc.DefineValue(inst, result); } void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - code->mov(result, qword[r15 + offsetof(A64JitState, sp)]); + code.mov(result, qword[r15 + offsetof(A64JitState, sp)]); ctx.reg_alloc.DefineValue(inst, result); } @@ -204,12 +204,12 @@ void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) { A64::Reg reg = inst->GetArg(0).GetA64RegRef(); auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; if (args[1].FitsInImmediateS32()) { - code->mov(addr, args[1].GetImmediateS32()); + code.mov(addr, args[1].GetImmediateS32()); } else { // TODO: zext tracking, xmm variant Xbyak::Reg64 to_store = ctx.reg_alloc.UseScratchGpr(args[1]); - code->mov(to_store.cvt32(), to_store.cvt32()); - code->mov(addr, to_store); + code.mov(to_store.cvt32(), to_store.cvt32()); + code.mov(addr, to_store); } } @@ -218,13 +218,13 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) { A64::Reg reg = inst->GetArg(0).GetA64RegRef(); auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; if (args[1].FitsInImmediateS32()) { - code->mov(addr, args[1].GetImmediateS32()); + code.mov(addr, args[1].GetImmediateS32()); } else if (args[1].IsInXmm()) { Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); - code->movq(addr, to_store); + code.movq(addr, to_store); } else { Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]); - code->mov(addr, to_store); + code.mov(addr, to_store); } } @@ -236,9 +236,9 @@ void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); // TODO: Optimize - code->pxor(tmp, tmp); - code->movss(tmp, to_store); - code->movaps(addr, tmp); + code.pxor(tmp, tmp); + code.movss(tmp, to_store); + code.movaps(addr, tmp); } void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) { @@ -247,8 +247,8 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) { auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]); - code->movq(to_store, to_store); // TODO: Remove when able - code->movaps(addr, to_store); + code.movq(to_store, to_store); // TODO: Remove when able + code.movaps(addr, to_store); } void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) { @@ -257,20 +257,20 @@ void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) { auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); - code->movaps(addr, to_store); + code.movaps(addr, to_store); } void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto addr = qword[r15 + offsetof(A64JitState, sp)]; if (args[0].FitsInImmediateS32()) { - code->mov(addr, args[0].GetImmediateS32()); + code.mov(addr, args[0].GetImmediateS32()); } else if (args[0].IsInXmm()) { Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]); - code->movq(addr, to_store); + code.movq(addr, to_store); } else { Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]); - code->mov(addr, to_store); + code.mov(addr, to_store); } } @@ -278,13 +278,13 @@ void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto addr = qword[r15 + offsetof(A64JitState, pc)]; if (args[0].FitsInImmediateS32()) { - code->mov(addr, args[0].GetImmediateS32()); + code.mov(addr, args[0].GetImmediateS32()); } else if (args[0].IsInXmm()) { Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]); - code->movq(addr, to_store); + code.movq(addr, to_store); } else { Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]); - code->mov(addr, to_store); + code.mov(addr, to_store); } } @@ -294,7 +294,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) { ASSERT(args[0].IsImmediate()); u32 imm = args[0].GetImmediateU32(); DEVIRT(conf.callbacks, &A64::UserCallbacks::CallSVC).EmitCall(code, [&](Xbyak::Reg64 param1) { - code->mov(param1.cvt32(), imm); + code.mov(param1.cvt32(), imm); }); } @@ -305,14 +305,14 @@ void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) { u64 pc = args[0].GetImmediateU64(); u64 exception = args[1].GetImmediateU64(); DEVIRT(conf.callbacks, &A64::UserCallbacks::ExceptionRaised).EmitCall(code, [&](Xbyak::Reg64 param1, Xbyak::Reg64 param2) { - code->mov(param1, pc); - code->mov(param2, exception); + code.mov(param1, pc); + code.mov(param2, exception); }); } void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead8).EmitCall(code, [&](Xbyak::Reg64 vaddr) { - ASSERT(vaddr == code->ABI_PARAM2); + ASSERT(vaddr == code.ABI_PARAM2); auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, {}, args[0]); }); @@ -320,7 +320,7 @@ void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) { DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead16).EmitCall(code, [&](Xbyak::Reg64 vaddr) { - ASSERT(vaddr == code->ABI_PARAM2); + ASSERT(vaddr == code.ABI_PARAM2); auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, {}, args[0]); }); @@ -328,7 +328,7 @@ void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) { DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead32).EmitCall(code, [&](Xbyak::Reg64 vaddr) { - ASSERT(vaddr == code->ABI_PARAM2); + ASSERT(vaddr == code.ABI_PARAM2); auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, {}, args[0]); }); @@ -336,7 +336,7 @@ void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) { DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead64).EmitCall(code, [&](Xbyak::Reg64 vaddr) { - ASSERT(vaddr == code->ABI_PARAM2); + ASSERT(vaddr == code.ABI_PARAM2); auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, {}, args[0]); }); @@ -348,33 +348,33 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { static_assert(ABI_SHADOW_SPACE >= 16); ctx.reg_alloc.HostCall(nullptr, {}, {}, args[0]); - code->lea(code->ABI_PARAM2, ptr[rsp]); - code->sub(rsp, ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM2, ptr[rsp]); + code.sub(rsp, ABI_SHADOW_SPACE); DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code, [&](Xbyak::Reg64 return_value, Xbyak::Reg64 vaddr) { - ASSERT(return_value == code->ABI_PARAM2 && vaddr == code->ABI_PARAM3); + ASSERT(return_value == code.ABI_PARAM2 && vaddr == code.ABI_PARAM3); }); Xbyak::Xmm result = xmm0; - code->movups(result, xword[code->ABI_RETURN]); - code->add(rsp, ABI_SHADOW_SPACE); + code.movups(result, xword[code.ABI_RETURN]); + code.add(rsp, ABI_SHADOW_SPACE); ctx.reg_alloc.DefineValue(inst, result); #else DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code, [&](Xbyak::Reg64 vaddr) { - ASSERT(vaddr == code->ABI_PARAM2); + ASSERT(vaddr == code.ABI_PARAM2); auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0]); }); Xbyak::Xmm result = xmm0; - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { - code->movq(result, code->ABI_RETURN); - code->pinsrq(result, code->ABI_RETURN2, 1); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + code.movq(result, code.ABI_RETURN); + code.pinsrq(result, code.ABI_RETURN2, 1); } else { Xbyak::Xmm tmp = xmm1; - code->movq(result, code->ABI_RETURN); - code->movq(tmp, code->ABI_RETURN2); - code->punpcklqdq(result, tmp); + code.movq(result, code.ABI_RETURN); + code.movq(tmp, code.ABI_RETURN2); + code.punpcklqdq(result, tmp); } ctx.reg_alloc.DefineValue(inst, result); #endif @@ -382,7 +382,7 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite8).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) { - ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3); + ASSERT(vaddr == code.ABI_PARAM2 && value == code.ABI_PARAM3); auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); }); @@ -390,7 +390,7 @@ void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) { DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite16).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) { - ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3); + ASSERT(vaddr == code.ABI_PARAM2 && value == code.ABI_PARAM3); auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); }); @@ -398,7 +398,7 @@ void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) { DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite32).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) { - ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3); + ASSERT(vaddr == code.ABI_PARAM2 && value == code.ABI_PARAM3); auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); }); @@ -406,7 +406,7 @@ void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) { DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite64).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) { - ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3); + ASSERT(vaddr == code.ABI_PARAM2 && value == code.ABI_PARAM3); auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); }); @@ -421,31 +421,31 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]); ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); - code->lea(code->ABI_PARAM3, ptr[rsp]); - code->sub(rsp, ABI_SHADOW_SPACE); - code->movaps(xword[code->ABI_PARAM3], xmm_value); + code.lea(code.ABI_PARAM3, ptr[rsp]); + code.sub(rsp, ABI_SHADOW_SPACE); + code.movaps(xword[code.ABI_PARAM3], xmm_value); DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value_ptr) { - ASSERT(vaddr == code->ABI_PARAM2 && value_ptr == code->ABI_PARAM3); + ASSERT(vaddr == code.ABI_PARAM2 && value_ptr == code.ABI_PARAM3); }); - code->add(rsp, ABI_SHADOW_SPACE); + code.add(rsp, ABI_SHADOW_SPACE); #else DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value0, Xbyak::Reg64 value1) { - ASSERT(vaddr == code->ABI_PARAM2 && value0 == code->ABI_PARAM3 && value1 == code->ABI_PARAM4); + ASSERT(vaddr == code.ABI_PARAM2 && value0 == code.ABI_PARAM3 && value1 == code.ABI_PARAM4); auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.Use(args[0], ABI_PARAM2); ctx.reg_alloc.ScratchGpr({ABI_PARAM3}); ctx.reg_alloc.ScratchGpr({ABI_PARAM4}); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]); - code->movq(code->ABI_PARAM3, xmm_value); - code->pextrq(code->ABI_PARAM4, xmm_value, 1); + code.movq(code.ABI_PARAM3, xmm_value); + code.pextrq(code.ABI_PARAM4, xmm_value, 1); } else { Xbyak::Xmm xmm_value = ctx.reg_alloc.UseScratchXmm(args[1]); - code->movq(code->ABI_PARAM3, xmm_value); - code->punpckhqdq(xmm_value, xmm_value); - code->movq(code->ABI_PARAM4, xmm_value); + code.movq(code.ABI_PARAM3, xmm_value); + code.punpckhqdq(xmm_value, xmm_value); + code.movq(code.ABI_PARAM4, xmm_value); } ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.HostCall(nullptr); @@ -454,35 +454,35 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { } void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) { - code->SwitchMxcsrOnExit(); + code.SwitchMxcsrOnExit(); DEVIRT(conf.callbacks, &A64::UserCallbacks::InterpreterFallback).EmitCall(code, [&](Xbyak::Reg64 param1, Xbyak::Reg64 param2) { - code->mov(param1, A64::LocationDescriptor{terminal.next}.PC()); - code->mov(qword[r15 + offsetof(A64JitState, pc)], param1); - code->mov(param2.cvt32(), terminal.num_instructions); + code.mov(param1, A64::LocationDescriptor{terminal.next}.PC()); + code.mov(qword[r15 + offsetof(A64JitState, pc)], param1); + code.mov(param2.cvt32(), terminal.num_instructions); }); - code->ReturnFromRunCode(true); // TODO: Check cycles + code.ReturnFromRunCode(true); // TODO: Check cycles } void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor) { - code->ReturnFromRunCode(); + code.ReturnFromRunCode(); } void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor) { - code->cmp(qword[r15 + offsetof(A64JitState, cycles_remaining)], 0); + code.cmp(qword[r15 + offsetof(A64JitState, cycles_remaining)], 0); - patch_information[terminal.next].jg.emplace_back(code->getCurr()); + patch_information[terminal.next].jg.emplace_back(code.getCurr()); if (auto next_bb = GetBasicBlock(terminal.next)) { EmitPatchJg(terminal.next, next_bb->entrypoint); } else { EmitPatchJg(terminal.next); } - code->mov(rax, A64::LocationDescriptor{terminal.next}.PC()); - code->mov(qword[r15 + offsetof(A64JitState, pc)], rax); - code->ForceReturnFromRunCode(); + code.mov(rax, A64::LocationDescriptor{terminal.next}.PC()); + code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.ForceReturnFromRunCode(); } void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor) { - patch_information[terminal.next].jmp.emplace_back(code->getCurr()); + patch_information[terminal.next].jmp.emplace_back(code.getCurr()); if (auto next_bb = GetBasicBlock(terminal.next)) { EmitPatchJmp(terminal.next, next_bb->entrypoint); } else { @@ -493,20 +493,20 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::Location void A64EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor) { // This calculation has to match up with A64::LocationDescriptor::UniqueHash // TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et. - code->mov(rcx, qword[r15 + offsetof(A64JitState, pc)]); - code->mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]); - code->and_(ebx, A64::LocationDescriptor::FPCR_MASK); - code->shl(ebx, 37); - code->or_(rbx, rcx); + code.mov(rcx, qword[r15 + offsetof(A64JitState, pc)]); + code.mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]); + code.and_(ebx, A64::LocationDescriptor::FPCR_MASK); + code.shl(ebx, 37); + code.or_(rbx, rcx); - code->mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]); - code->sub(eax, 1); - code->and_(eax, u32(A64JitState::RSBPtrMask)); - code->mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax); - code->cmp(rbx, qword[r15 + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]); - code->jne(code->GetReturnFromRunCodeAddress()); - code->mov(rax, qword[r15 + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]); - code->jmp(rax); + code.mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]); + code.sub(eax, 1); + code.and_(eax, u32(A64JitState::RSBPtrMask)); + code.mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax); + code.cmp(rbx, qword[r15 + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]); + code.jne(code.GetReturnFromRunCodeAddress()); + code.mov(rax, qword[r15 + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]); + code.jmp(rax); } void A64EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) { @@ -518,7 +518,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor default: Xbyak::Label pass = EmitCond(terminal.if_); EmitTerminal(terminal.else_, initial_location); - code->L(pass); + code.L(pass); EmitTerminal(terminal.then_, initial_location); break; } @@ -526,50 +526,50 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location) { Xbyak::Label fail; - code->cmp(code->byte[r15 + offsetof(A64JitState, check_bit)], u8(0)); - code->jz(fail); + code.cmp(code.byte[r15 + offsetof(A64JitState, check_bit)], u8(0)); + code.jz(fail); EmitTerminal(terminal.then_, initial_location); - code->L(fail); + code.L(fail); EmitTerminal(terminal.else_, initial_location); } void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) { - code->cmp(code->byte[r15 + offsetof(A64JitState, halt_requested)], u8(0)); - code->jne(code->GetForceReturnFromRunCodeAddress()); + code.cmp(code.byte[r15 + offsetof(A64JitState, halt_requested)], u8(0)); + code.jne(code.GetForceReturnFromRunCodeAddress()); EmitTerminal(terminal.else_, initial_location); } void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { - const CodePtr patch_location = code->getCurr(); + const CodePtr patch_location = code.getCurr(); if (target_code_ptr) { - code->jg(target_code_ptr); + code.jg(target_code_ptr); } else { - code->mov(rax, A64::LocationDescriptor{target_desc}.PC()); - code->mov(qword[r15 + offsetof(A64JitState, pc)], rax); - code->jg(code->GetReturnFromRunCodeAddress()); + code.mov(rax, A64::LocationDescriptor{target_desc}.PC()); + code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.jg(code.GetReturnFromRunCodeAddress()); } - code->EnsurePatchLocationSize(patch_location, 30); // TODO: Reduce size + code.EnsurePatchLocationSize(patch_location, 30); // TODO: Reduce size } void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { - const CodePtr patch_location = code->getCurr(); + const CodePtr patch_location = code.getCurr(); if (target_code_ptr) { - code->jmp(target_code_ptr); + code.jmp(target_code_ptr); } else { - code->mov(rax, A64::LocationDescriptor{target_desc}.PC()); - code->mov(qword[r15 + offsetof(A64JitState, pc)], rax); - code->jmp(code->GetReturnFromRunCodeAddress()); + code.mov(rax, A64::LocationDescriptor{target_desc}.PC()); + code.mov(qword[r15 + offsetof(A64JitState, pc)], rax); + code.jmp(code.GetReturnFromRunCodeAddress()); } - code->EnsurePatchLocationSize(patch_location, 30); // TODO: Reduce size + code.EnsurePatchLocationSize(patch_location, 30); // TODO: Reduce size } void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) { if (!target_code_ptr) { - target_code_ptr = code->GetReturnFromRunCodeAddress(); + target_code_ptr = code.GetReturnFromRunCodeAddress(); } - const CodePtr patch_location = code->getCurr(); - code->mov(code->rcx, reinterpret_cast(target_code_ptr)); - code->EnsurePatchLocationSize(patch_location, 10); + const CodePtr patch_location = code.getCurr(); + code.mov(code.rcx, reinterpret_cast(target_code_ptr)); + code.EnsurePatchLocationSize(patch_location, 10); } } // namespace Dynarmic::BackendX64 diff --git a/src/backend_x64/a64_emit_x64.h b/src/backend_x64/a64_emit_x64.h index 768f0c2e..7094d412 100644 --- a/src/backend_x64/a64_emit_x64.h +++ b/src/backend_x64/a64_emit_x64.h @@ -27,7 +27,7 @@ struct A64EmitContext final : public EmitContext { class A64EmitX64 final : public EmitX64 { public: - A64EmitX64(BlockOfCode* code, A64::UserConfig conf); + A64EmitX64(BlockOfCode& code, A64::UserConfig conf); ~A64EmitX64() override; /** diff --git a/src/backend_x64/a64_interface.cpp b/src/backend_x64/a64_interface.cpp index fbace6e9..cb9bba4d 100644 --- a/src/backend_x64/a64_interface.cpp +++ b/src/backend_x64/a64_interface.cpp @@ -39,7 +39,7 @@ public: explicit Impl(UserConfig conf) : conf(conf) , block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}) - , emitter(&block_of_code, conf) + , emitter(block_of_code, conf) {} ~Impl() = default; diff --git a/src/backend_x64/abi.cpp b/src/backend_x64/abi.cpp index 332cabf6..a3889afd 100644 --- a/src/backend_x64/abi.cpp +++ b/src/backend_x64/abi.cpp @@ -55,7 +55,7 @@ static FrameInfo CalculateFrameInfo(size_t num_gprs, size_t num_xmms, size_t fra } template -void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size, const RegisterArrayT& regs) { +void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size, const RegisterArrayT& regs) { using namespace Xbyak::util; const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR); @@ -65,25 +65,25 @@ void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_si for (HostLoc gpr : regs) { if (HostLocIsGPR(gpr)) { - code->push(HostLocToReg64(gpr)); + code.push(HostLocToReg64(gpr)); } } if (frame_info.stack_subtraction != 0) { - code->sub(rsp, u32(frame_info.stack_subtraction)); + code.sub(rsp, u32(frame_info.stack_subtraction)); } size_t xmm_offset = frame_info.xmm_offset; for (HostLoc xmm : regs) { if (HostLocIsXMM(xmm)) { - code->movaps(code->xword[rsp + xmm_offset], HostLocToXmm(xmm)); + code.movaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm)); xmm_offset += XMM_SIZE; } } } template -void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size, const RegisterArrayT& regs) { +void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size, const RegisterArrayT& regs) { using namespace Xbyak::util; const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR); @@ -94,35 +94,35 @@ void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_siz size_t xmm_offset = frame_info.xmm_offset; for (HostLoc xmm : regs) { if (HostLocIsXMM(xmm)) { - code->movaps(HostLocToXmm(xmm), code->xword[rsp + xmm_offset]); + code.movaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]); xmm_offset += XMM_SIZE; } } if (frame_info.stack_subtraction != 0) { - code->add(rsp, u32(frame_info.stack_subtraction)); + code.add(rsp, u32(frame_info.stack_subtraction)); } for (HostLoc gpr : Common::Reverse(regs)) { if (HostLocIsGPR(gpr)) { - code->pop(HostLocToReg64(gpr)); + code.pop(HostLocToReg64(gpr)); } } } -void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) { +void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) { ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE); } -void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) { +void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) { ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE); } -void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) { +void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) { ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE); } -void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) { +void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) { ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE); } diff --git a/src/backend_x64/abi.h b/src/backend_x64/abi.h index 073a6219..b097efcb 100644 --- a/src/backend_x64/abi.h +++ b/src/backend_x64/abi.h @@ -109,9 +109,9 @@ constexpr size_t ABI_SHADOW_SPACE = 0; // bytes static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 30, "Invalid total number of registers"); -void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0); -void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0); -void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0); -void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0); +void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0); +void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0); +void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0); +void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0); } // namespace Dynarmic::BackendX64 diff --git a/src/backend_x64/block_of_code.cpp b/src/backend_x64/block_of_code.cpp index e2d7a354..b8e9bfa0 100644 --- a/src/backend_x64/block_of_code.cpp +++ b/src/backend_x64/block_of_code.cpp @@ -38,10 +38,10 @@ BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi) : Xbyak::CodeGenerator(TOTAL_CODE_SIZE) , cb(std::move(cb)) , jsi(jsi) - , constant_pool(this, 256) + , constant_pool(*this, 256) { GenRunCode(); - exception_handler.Register(this); + exception_handler.Register(*this); } void BlockOfCode::PreludeComplete() { @@ -107,12 +107,12 @@ void BlockOfCode::GenRunCode() { align(); run_code_from = getCurr(); - ABI_PushCalleeSaveRegistersAndAdjustStack(this); + ABI_PushCalleeSaveRegistersAndAdjustStack(*this); mov(r15, ABI_PARAM1); mov(r14, ABI_PARAM2); // save temporarily in non-volatile register - cb.GetTicksRemaining->EmitCall(this); + cb.GetTicksRemaining->EmitCall(*this); mov(qword[r15 + jsi.offsetof_cycles_to_run], ABI_RETURN); mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN); @@ -126,18 +126,18 @@ void BlockOfCode::GenRunCode() { // 1. It saves all the registers we as a callee need to save. // 2. It aligns the stack so that the code the JIT emits can assume // that the stack is appropriately aligned for CALLs. - ABI_PushCalleeSaveRegistersAndAdjustStack(this); + ABI_PushCalleeSaveRegistersAndAdjustStack(*this); mov(r15, ABI_PARAM1); - cb.GetTicksRemaining->EmitCall(this); + cb.GetTicksRemaining->EmitCall(*this); mov(qword[r15 + jsi.offsetof_cycles_to_run], ABI_RETURN); mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN); L(enter_mxcsr_then_loop); SwitchMxcsrOnEntry(); L(loop); - cb.LookupBlock->EmitCall(this); + cb.LookupBlock->EmitCall(*this); jmp(ABI_RETURN); @@ -152,12 +152,12 @@ void BlockOfCode::GenRunCode() { SwitchMxcsrOnExit(); } - cb.AddTicks->EmitCall(this, [this](Xbyak::Reg64 param1) { + cb.AddTicks->EmitCall(*this, [this](Xbyak::Reg64 param1) { mov(param1, qword[r15 + jsi.offsetof_cycles_to_run]); sub(param1, qword[r15 + jsi.offsetof_cycles_remaining]); }); - ABI_PopCalleeSaveRegistersAndAdjustStack(this); + ABI_PopCalleeSaveRegistersAndAdjustStack(*this); ret(); }; diff --git a/src/backend_x64/block_of_code.h b/src/backend_x64/block_of_code.h index e839bac5..7b61a1f1 100644 --- a/src/backend_x64/block_of_code.h +++ b/src/backend_x64/block_of_code.h @@ -138,7 +138,7 @@ private: ExceptionHandler(); ~ExceptionHandler(); - void Register(BlockOfCode* code); + void Register(BlockOfCode& code); private: struct Impl; std::unique_ptr impl; diff --git a/src/backend_x64/callback.cpp b/src/backend_x64/callback.cpp index d1836b81..0455fa8f 100644 --- a/src/backend_x64/callback.cpp +++ b/src/backend_x64/callback.cpp @@ -9,48 +9,48 @@ namespace Dynarmic::BackendX64 { -void SimpleCallback::EmitCall(BlockOfCode* code, std::function l) { +void SimpleCallback::EmitCall(BlockOfCode& code, std::function l) { l(); - code->CallFunction(fn); + code.CallFunction(fn); } -void SimpleCallback::EmitCall(BlockOfCode* code, std::function l) { - l(code->ABI_PARAM1); - code->CallFunction(fn); +void SimpleCallback::EmitCall(BlockOfCode& code, std::function l) { + l(code.ABI_PARAM1); + code.CallFunction(fn); } -void SimpleCallback::EmitCall(BlockOfCode* code, std::function l) { - l(code->ABI_PARAM1, code->ABI_PARAM2); - code->CallFunction(fn); +void SimpleCallback::EmitCall(BlockOfCode& code, std::function l) { + l(code.ABI_PARAM1, code.ABI_PARAM2); + code.CallFunction(fn); } -void SimpleCallback::EmitCall(BlockOfCode* code, std::function l) { - l(code->ABI_PARAM1, code->ABI_PARAM2, code->ABI_PARAM3); - code->CallFunction(fn); +void SimpleCallback::EmitCall(BlockOfCode& code, std::function l) { + l(code.ABI_PARAM1, code.ABI_PARAM2, code.ABI_PARAM3); + code.CallFunction(fn); } -void ArgCallback::EmitCall(BlockOfCode* code, std::function l) { +void ArgCallback::EmitCall(BlockOfCode& code, std::function l) { l(); - code->mov(code->ABI_PARAM1, arg); - code->CallFunction(fn); + code.mov(code.ABI_PARAM1, arg); + code.CallFunction(fn); } -void ArgCallback::EmitCall(BlockOfCode* code, std::function l) { - l(code->ABI_PARAM2); - code->mov(code->ABI_PARAM1, arg); - code->CallFunction(fn); +void ArgCallback::EmitCall(BlockOfCode& code, std::function l) { + l(code.ABI_PARAM2); + code.mov(code.ABI_PARAM1, arg); + code.CallFunction(fn); } -void ArgCallback::EmitCall(BlockOfCode* code, std::function l) { - l(code->ABI_PARAM2, code->ABI_PARAM3); - code->mov(code->ABI_PARAM1, arg); - code->CallFunction(fn); +void ArgCallback::EmitCall(BlockOfCode& code, std::function l) { + l(code.ABI_PARAM2, code.ABI_PARAM3); + code.mov(code.ABI_PARAM1, arg); + code.CallFunction(fn); } -void ArgCallback::EmitCall(BlockOfCode* code, std::function l) { - l(code->ABI_PARAM2, code->ABI_PARAM3, code->ABI_PARAM4); - code->mov(code->ABI_PARAM1, arg); - code->CallFunction(fn); +void ArgCallback::EmitCall(BlockOfCode& code, std::function l) { + l(code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4); + code.mov(code.ABI_PARAM1, arg); + code.CallFunction(fn); } } // namespace Dynarmic::BackendX64 diff --git a/src/backend_x64/callback.h b/src/backend_x64/callback.h index 4e38d372..a2084045 100644 --- a/src/backend_x64/callback.h +++ b/src/backend_x64/callback.h @@ -20,10 +20,10 @@ class Callback { public: virtual ~Callback() = default; - virtual void EmitCall(BlockOfCode* code, std::function fn = []{}) = 0; - virtual void EmitCall(BlockOfCode* code, std::function fn) = 0; - virtual void EmitCall(BlockOfCode* code, std::function fn) = 0; - virtual void EmitCall(BlockOfCode* code, std::function fn) = 0; + virtual void EmitCall(BlockOfCode& code, std::function fn = []{}) = 0; + virtual void EmitCall(BlockOfCode& code, std::function fn) = 0; + virtual void EmitCall(BlockOfCode& code, std::function fn) = 0; + virtual void EmitCall(BlockOfCode& code, std::function fn) = 0; }; class SimpleCallback final : public Callback { @@ -33,10 +33,10 @@ public: ~SimpleCallback() override = default; - void EmitCall(BlockOfCode* code, std::function l = []{}) override; - void EmitCall(BlockOfCode* code, std::function l) override; - void EmitCall(BlockOfCode* code, std::function l) override; - void EmitCall(BlockOfCode* code, std::function l) override; + void EmitCall(BlockOfCode& code, std::function l = []{}) override; + void EmitCall(BlockOfCode& code, std::function l) override; + void EmitCall(BlockOfCode& code, std::function l) override; + void EmitCall(BlockOfCode& code, std::function l) override; private: void (*fn)(); @@ -49,10 +49,10 @@ public: ~ArgCallback() override = default; - void EmitCall(BlockOfCode* code, std::function l = []{}) override; - void EmitCall(BlockOfCode* code, std::function l) override; - void EmitCall(BlockOfCode* code, std::function l) override; - void EmitCall(BlockOfCode* code, std::function l) override; + void EmitCall(BlockOfCode& code, std::function l = []{}) override; + void EmitCall(BlockOfCode& code, std::function l) override; + void EmitCall(BlockOfCode& code, std::function l) override; + void EmitCall(BlockOfCode& code, std::function l) override; private: void (*fn)(); diff --git a/src/backend_x64/constant_pool.cpp b/src/backend_x64/constant_pool.cpp index 32df8cb2..f2e57d0b 100644 --- a/src/backend_x64/constant_pool.cpp +++ b/src/backend_x64/constant_pool.cpp @@ -12,10 +12,10 @@ namespace Dynarmic::BackendX64 { -ConstantPool::ConstantPool(BlockOfCode* code, size_t size) : code(code), pool_size(size) { - code->int3(); - code->align(align_size); - pool_begin = reinterpret_cast(code->AllocateFromCodeSpace(size)); +ConstantPool::ConstantPool(BlockOfCode& code, size_t size) : code(code), pool_size(size) { + code.int3(); + code.align(align_size); + pool_begin = reinterpret_cast(code.AllocateFromCodeSpace(size)); std::memset(pool_begin, 0, size); current_pool_ptr = pool_begin; } @@ -28,7 +28,7 @@ Xbyak::Address ConstantPool::GetConstant(u64 constant) { iter = constant_info.emplace(constant, current_pool_ptr).first; current_pool_ptr += align_size; } - return code->xword[code->rip + iter->second]; + return code.xword[code.rip + iter->second]; } } // namespace Dynarmic::BackendX64 diff --git a/src/backend_x64/constant_pool.h b/src/backend_x64/constant_pool.h index 27bd012b..aa749304 100644 --- a/src/backend_x64/constant_pool.h +++ b/src/backend_x64/constant_pool.h @@ -22,7 +22,7 @@ class BlockOfCode; /// already exists, its memory location is reused. class ConstantPool final { public: - ConstantPool(BlockOfCode* code, size_t size); + ConstantPool(BlockOfCode& code, size_t size); Xbyak::Address GetConstant(u64 constant); @@ -31,7 +31,7 @@ private: std::map constant_info; - BlockOfCode* code; + BlockOfCode& code; size_t pool_size; u8* pool_begin; u8* current_pool_ptr; diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 08296684..2875f596 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -31,7 +31,7 @@ void EmitContext::EraseInstruction(IR::Inst* inst) { inst->ClearArgs(); } -EmitX64::EmitX64(BlockOfCode* code) +EmitX64::EmitX64(BlockOfCode& code) : code(code) {} EmitX64::~EmitX64() = default; @@ -47,7 +47,7 @@ void EmitX64::EmitVoid(EmitContext&, IR::Inst*) { } void EmitX64::EmitBreakpoint(EmitContext&, IR::Inst*) { - code->int3(); + code.int3(); } void EmitX64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) { @@ -63,21 +63,21 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, I auto iter = block_descriptors.find(target); CodePtr target_code_ptr = iter != block_descriptors.end() ? iter->second.entrypoint - : code->GetReturnFromRunCodeAddress(); + : code.GetReturnFromRunCodeAddress(); - code->mov(index_reg.cvt32(), dword[r15 + code->GetJitStateInfo().offsetof_rsb_ptr]); + code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]); - code->mov(loc_desc_reg, target.Value()); + code.mov(loc_desc_reg, target.Value()); - patch_information[target].mov_rcx.emplace_back(code->getCurr()); + patch_information[target].mov_rcx.emplace_back(code.getCurr()); EmitPatchMovRcx(target_code_ptr); - code->mov(qword[r15 + index_reg * 8 + code->GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg); - code->mov(qword[r15 + index_reg * 8 + code->GetJitStateInfo().offsetof_rsb_codeptrs], rcx); + code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg); + code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx); - code->add(index_reg.cvt32(), 1); - code->and_(index_reg.cvt32(), u32(code->GetJitStateInfo().rsb_ptr_mask)); - code->mov(dword[r15 + code->GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32()); + code.add(index_reg.cvt32(), 1); + code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask)); + code.mov(dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32()); } void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) { @@ -125,9 +125,9 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr({HostLoc::RAX}); Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize); - code->cmp(value, 0); - code->lahf(); - code->seto(code->al); + code.cmp(value, 0); + code.lahf(); + code.seto(code.al); ctx.reg_alloc.DefineValue(inst, nzcv); } @@ -141,28 +141,28 @@ void EmitX64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) { value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0; value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0; value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0; - code->mov(nzcv, value); + code.mov(nzcv, value); ctx.reg_alloc.DefineValue(inst, nzcv); } else { Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); // TODO: Optimize - code->shr(nzcv, 28); - code->imul(nzcv, nzcv, 0b00010000'10000001); - code->and_(nzcv.cvt8(), 1); + code.shr(nzcv, 28); + code.imul(nzcv, nzcv, 0b00010000'10000001); + code.and_(nzcv.cvt8(), 1); ctx.reg_alloc.DefineValue(inst, nzcv); } } void EmitX64::EmitAddCycles(size_t cycles) { ASSERT(cycles < std::numeric_limits::max()); - code->sub(qword[r15 + code->GetJitStateInfo().offsetof_cycles_remaining], static_cast(cycles)); + code.sub(qword[r15 + code.GetJitStateInfo().offsetof_cycles_remaining], static_cast(cycles)); } Xbyak::Label EmitX64::EmitCond(IR::Cond cond) { Xbyak::Label label; const Xbyak::Reg32 cpsr = eax; - code->mov(cpsr, dword[r15 + code->GetJitStateInfo().offsetof_CPSR_nzcv]); + code.mov(cpsr, dword[r15 + code.GetJitStateInfo().offsetof_CPSR_nzcv]); constexpr size_t n_shift = 31; constexpr size_t z_shift = 30; @@ -175,91 +175,91 @@ Xbyak::Label EmitX64::EmitCond(IR::Cond cond) { switch (cond) { case IR::Cond::EQ: //z - code->test(cpsr, z_mask); - code->jnz(label); + code.test(cpsr, z_mask); + code.jnz(label); break; case IR::Cond::NE: //!z - code->test(cpsr, z_mask); - code->jz(label); + code.test(cpsr, z_mask); + code.jz(label); break; case IR::Cond::CS: //c - code->test(cpsr, c_mask); - code->jnz(label); + code.test(cpsr, c_mask); + code.jnz(label); break; case IR::Cond::CC: //!c - code->test(cpsr, c_mask); - code->jz(label); + code.test(cpsr, c_mask); + code.jz(label); break; case IR::Cond::MI: //n - code->test(cpsr, n_mask); - code->jnz(label); + code.test(cpsr, n_mask); + code.jnz(label); break; case IR::Cond::PL: //!n - code->test(cpsr, n_mask); - code->jz(label); + code.test(cpsr, n_mask); + code.jz(label); break; case IR::Cond::VS: //v - code->test(cpsr, v_mask); - code->jnz(label); + code.test(cpsr, v_mask); + code.jnz(label); break; case IR::Cond::VC: //!v - code->test(cpsr, v_mask); - code->jz(label); + code.test(cpsr, v_mask); + code.jz(label); break; case IR::Cond::HI: { //c & !z - code->and_(cpsr, z_mask | c_mask); - code->cmp(cpsr, c_mask); - code->je(label); + code.and_(cpsr, z_mask | c_mask); + code.cmp(cpsr, c_mask); + code.je(label); break; } case IR::Cond::LS: { //!c | z - code->and_(cpsr, z_mask | c_mask); - code->cmp(cpsr, c_mask); - code->jne(label); + code.and_(cpsr, z_mask | c_mask); + code.cmp(cpsr, c_mask); + code.jne(label); break; } case IR::Cond::GE: { // n == v - code->and_(cpsr, n_mask | v_mask); - code->jz(label); - code->cmp(cpsr, n_mask | v_mask); - code->je(label); + code.and_(cpsr, n_mask | v_mask); + code.jz(label); + code.cmp(cpsr, n_mask | v_mask); + code.je(label); break; } case IR::Cond::LT: { // n != v Xbyak::Label fail; - code->and_(cpsr, n_mask | v_mask); - code->jz(fail); - code->cmp(cpsr, n_mask | v_mask); - code->jne(label); - code->L(fail); + code.and_(cpsr, n_mask | v_mask); + code.jz(fail); + code.cmp(cpsr, n_mask | v_mask); + code.jne(label); + code.L(fail); break; } case IR::Cond::GT: { // !z & (n == v) const Xbyak::Reg32 tmp1 = ebx; const Xbyak::Reg32 tmp2 = esi; - code->mov(tmp1, cpsr); - code->mov(tmp2, cpsr); - code->shr(tmp1, n_shift); - code->shr(tmp2, v_shift); - code->shr(cpsr, z_shift); - code->xor_(tmp1, tmp2); - code->or_(tmp1, cpsr); - code->test(tmp1, 1); - code->jz(label); + code.mov(tmp1, cpsr); + code.mov(tmp2, cpsr); + code.shr(tmp1, n_shift); + code.shr(tmp2, v_shift); + code.shr(cpsr, z_shift); + code.xor_(tmp1, tmp2); + code.or_(tmp1, cpsr); + code.test(tmp1, 1); + code.jz(label); break; } case IR::Cond::LE: { // z | (n != v) const Xbyak::Reg32 tmp1 = ebx; const Xbyak::Reg32 tmp2 = esi; - code->mov(tmp1, cpsr); - code->mov(tmp2, cpsr); - code->shr(tmp1, n_shift); - code->shr(tmp2, v_shift); - code->shr(cpsr, z_shift); - code->xor_(tmp1, tmp2); - code->or_(tmp1, cpsr); - code->test(tmp1, 1); - code->jnz(label); + code.mov(tmp1, cpsr); + code.mov(tmp2, cpsr); + code.shr(tmp1, n_shift); + code.shr(tmp2, v_shift); + code.shr(cpsr, z_shift); + code.xor_(tmp1, tmp2); + code.or_(tmp1, cpsr); + code.test(tmp1, 1); + code.jnz(label); break; } default: @@ -281,7 +281,7 @@ void EmitX64::EmitCondPrelude(const IR::Block& block) { Xbyak::Label pass = EmitCond(block.GetCondition()); EmitAddCycles(block.ConditionFailedCycleCount()); EmitTerminal(IR::Term::LinkBlock{block.ConditionFailedLocation()}, block.Location()); - code->L(pass); + code.L(pass); } void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) { @@ -296,25 +296,25 @@ void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial } void EmitX64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) { - const CodePtr save_code_ptr = code->getCurr(); + const CodePtr save_code_ptr = code.getCurr(); const PatchInformation& patch_info = patch_information[desc]; for (CodePtr location : patch_info.jg) { - code->SetCodePtr(location); + code.SetCodePtr(location); EmitPatchJg(desc, bb); } for (CodePtr location : patch_info.jmp) { - code->SetCodePtr(location); + code.SetCodePtr(location); EmitPatchJmp(desc, bb); } for (CodePtr location : patch_info.mov_rcx) { - code->SetCodePtr(location); + code.SetCodePtr(location); EmitPatchMovRcx(bb); } - code->SetCodePtr(save_code_ptr); + code.SetCodePtr(save_code_ptr); } void EmitX64::Unpatch(const IR::LocationDescriptor& desc) { diff --git a/src/backend_x64/emit_x64.h b/src/backend_x64/emit_x64.h index 6e9e47bb..e19ec810 100644 --- a/src/backend_x64/emit_x64.h +++ b/src/backend_x64/emit_x64.h @@ -48,7 +48,7 @@ public: size_t size; // Length in bytes of emitted code }; - EmitX64(BlockOfCode* code); + EmitX64(BlockOfCode& code); virtual ~EmitX64(); /// Looks up an emitted host block in the cache. @@ -100,7 +100,7 @@ protected: virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0; // State - BlockOfCode* code; + BlockOfCode& code; std::unordered_map block_descriptors; std::unordered_map patch_information; }; diff --git a/src/backend_x64/emit_x64_aes.cpp b/src/backend_x64/emit_x64_aes.cpp index 7dd2d706..efc1071b 100644 --- a/src/backend_x64/emit_x64_aes.cpp +++ b/src/backend_x64/emit_x64_aes.cpp @@ -44,21 +44,21 @@ static void EmitMixColumns(std::array args, EmitContext& ctx, Block void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tAESNI)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAESNI)) { const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code->aesimc(result, operand); + code.aesimc(result, operand); ctx.reg_alloc.DefineValue(inst, result); } else { - EmitMixColumns(args, ctx, *code, inst, Common::InverseMixColumns); + EmitMixColumns(args, ctx, code, inst, Common::InverseMixColumns); } } void EmitX64::EmitAESMixColumns(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - EmitMixColumns(args, ctx, *code, inst, Common::MixColumns); + EmitMixColumns(args, ctx, code, inst, Common::MixColumns); } } // namespace Dynarmic::BackendX64 diff --git a/src/backend_x64/emit_x64_crc32.cpp b/src/backend_x64/emit_x64_crc32.cpp index c1f4cb28..71b6347e 100644 --- a/src/backend_x64/emit_x64_crc32.cpp +++ b/src/backend_x64/emit_x64_crc32.cpp @@ -42,35 +42,35 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co } void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) { - EmitCRC32Castagnoli(*code, ctx, inst, 8); + EmitCRC32Castagnoli(code, ctx, inst, 8); } void EmitX64::EmitCRC32Castagnoli16(EmitContext& ctx, IR::Inst* inst) { - EmitCRC32Castagnoli(*code, ctx, inst, 16); + EmitCRC32Castagnoli(code, ctx, inst, 16); } void EmitX64::EmitCRC32Castagnoli32(EmitContext& ctx, IR::Inst* inst) { - EmitCRC32Castagnoli(*code, ctx, inst, 32); + EmitCRC32Castagnoli(code, ctx, inst, 32); } void EmitX64::EmitCRC32Castagnoli64(EmitContext& ctx, IR::Inst* inst) { - EmitCRC32Castagnoli(*code, ctx, inst, 64); + EmitCRC32Castagnoli(code, ctx, inst, 64); } void EmitX64::EmitCRC32ISO8(EmitContext& ctx, IR::Inst* inst) { - EmitCRC32ISO(*code, ctx, inst, 8); + EmitCRC32ISO(code, ctx, inst, 8); } void EmitX64::EmitCRC32ISO16(EmitContext& ctx, IR::Inst* inst) { - EmitCRC32ISO(*code, ctx, inst, 16); + EmitCRC32ISO(code, ctx, inst, 16); } void EmitX64::EmitCRC32ISO32(EmitContext& ctx, IR::Inst* inst) { - EmitCRC32ISO(*code, ctx, inst, 32); + EmitCRC32ISO(code, ctx, inst, 32); } void EmitX64::EmitCRC32ISO64(EmitContext& ctx, IR::Inst* inst) { - EmitCRC32ISO(*code, ctx, inst, 64); + EmitCRC32ISO(code, ctx, inst, 64); } } // namespace Dynarmic::BackendX64 diff --git a/src/backend_x64/emit_x64_data_processing.cpp b/src/backend_x64/emit_x64_data_processing.cpp index abfbb2b1..bf595811 100644 --- a/src/backend_x64/emit_x64_data_processing.cpp +++ b/src/backend_x64/emit_x64_data_processing.cpp @@ -21,9 +21,9 @@ void EmitX64::EmitPack2x32To1x64(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg64 lo = ctx.reg_alloc.UseScratchGpr(args[0]); Xbyak::Reg64 hi = ctx.reg_alloc.UseScratchGpr(args[1]); - code->shl(hi, 32); - code->mov(lo.cvt32(), lo.cvt32()); // Zero extend to 64-bits - code->or_(lo, hi); + code.shl(hi, 32); + code.mov(lo.cvt32(), lo.cvt32()); // Zero extend to 64-bits + code.or_(lo, hi); ctx.reg_alloc.DefineValue(inst, lo); } @@ -38,11 +38,11 @@ void EmitX64::EmitMostSignificantWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); - code->shr(result, 32); + code.shr(result, 32); if (carry_inst) { Xbyak::Reg64 carry = ctx.reg_alloc.ScratchGpr(); - code->setc(carry.cvt8()); + code.setc(carry.cvt8()); ctx.reg_alloc.DefineValue(carry_inst, carry); ctx.EraseInstruction(carry_inst); } @@ -64,7 +64,7 @@ void EmitX64::EmitMostSignificantBit(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); // TODO: Flag optimization - code->shr(result, 31); + code.shr(result, 31); ctx.reg_alloc.DefineValue(inst, result); } @@ -72,9 +72,9 @@ void EmitX64::EmitIsZero32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); // TODO: Flag optimization - code->test(result, result); - code->sete(result.cvt8()); - code->movzx(result, result.cvt8()); + code.test(result, result); + code.sete(result.cvt8()); + code.movzx(result, result.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } @@ -82,9 +82,9 @@ void EmitX64::EmitIsZero64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); // TODO: Flag optimization - code->test(result, result); - code->sete(result.cvt8()); - code->movzx(result, result.cvt8()); + code.test(result, result); + code.sete(result.cvt8()); + code.movzx(result, result.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } @@ -93,73 +93,73 @@ void EmitX64::EmitTestBit(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); ASSERT(args[1].IsImmediate()); // TODO: Flag optimization - code->bt(result, args[1].GetImmediateU8()); - code->setc(result.cvt8()); + code.bt(result, args[1].GetImmediateU8()); + code.setc(result.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } -static void EmitConditionalSelect(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, int bitsize) { +static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bitsize) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr({HostLoc::RAX}).cvt32(); Xbyak::Reg then_ = ctx.reg_alloc.UseGpr(args[1]).changeBit(bitsize); Xbyak::Reg else_ = ctx.reg_alloc.UseScratchGpr(args[2]).changeBit(bitsize); - code->mov(nzcv, dword[r15 + code->GetJitStateInfo().offsetof_CPSR_nzcv]); + code.mov(nzcv, dword[r15 + code.GetJitStateInfo().offsetof_CPSR_nzcv]); // TODO: Flag optimization - code->shr(nzcv, 28); - code->imul(nzcv, nzcv, 0b00010000'10000001); - code->and_(nzcv.cvt8(), 1); - code->add(nzcv.cvt8(), 0x7F); // restore OF - code->sahf(); // restore SF, ZF, CF + code.shr(nzcv, 28); + code.imul(nzcv, nzcv, 0b00010000'10000001); + code.and_(nzcv.cvt8(), 1); + code.add(nzcv.cvt8(), 0x7F); // restore OF + code.sahf(); // restore SF, ZF, CF switch (args[0].GetImmediateCond()) { case IR::Cond::EQ: //z - code->cmovz(else_, then_); + code.cmovz(else_, then_); break; case IR::Cond::NE: //!z - code->cmovnz(else_, then_); + code.cmovnz(else_, then_); break; case IR::Cond::CS: //c - code->cmovc(else_, then_); + code.cmovc(else_, then_); break; case IR::Cond::CC: //!c - code->cmovnc(else_, then_); + code.cmovnc(else_, then_); break; case IR::Cond::MI: //n - code->cmovs(else_, then_); + code.cmovs(else_, then_); break; case IR::Cond::PL: //!n - code->cmovns(else_, then_); + code.cmovns(else_, then_); break; case IR::Cond::VS: //v - code->cmovo(else_, then_); + code.cmovo(else_, then_); break; case IR::Cond::VC: //!v - code->cmovno(else_, then_); + code.cmovno(else_, then_); break; case IR::Cond::HI: //c & !z - code->cmc(); - code->cmova(else_, then_); + code.cmc(); + code.cmova(else_, then_); break; case IR::Cond::LS: //!c | z - code->cmc(); - code->cmovna(else_, then_); + code.cmc(); + code.cmovna(else_, then_); break; case IR::Cond::GE: // n == v - code->cmovge(else_, then_); + code.cmovge(else_, then_); break; case IR::Cond::LT: // n != v - code->cmovl(else_, then_); + code.cmovl(else_, then_); break; case IR::Cond::GT: // !z & (n == v) - code->cmovg(else_, then_); + code.cmovg(else_, then_); break; case IR::Cond::LE: // z | (n != v) - code->cmovle(else_, then_); + code.cmovle(else_, then_); break; case IR::Cond::AL: case IR::Cond::NV: - code->mov(else_, then_); + code.mov(else_, then_); break; default: ASSERT_MSG(false, "Invalid cond {}", static_cast(args[0].GetImmediateCond())); @@ -189,11 +189,11 @@ static void EmitExtractRegister(BlockOfCode& code, EmitContext& ctx, IR::Inst* i } void EmitX64::EmitExtractRegister32(Dynarmic::BackendX64::EmitContext& ctx, IR::Inst* inst) { - EmitExtractRegister(*code, ctx, inst, 32); + EmitExtractRegister(code, ctx, inst, 32); } void EmitX64::EmitExtractRegister64(Dynarmic::BackendX64::EmitContext& ctx, IR::Inst* inst) { - EmitExtractRegister(*code, ctx, inst, 64); + EmitExtractRegister(code, ctx, inst, 64); } void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { @@ -212,9 +212,9 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { u8 shift = shift_arg.GetImmediateU8(); if (shift <= 31) { - code->shl(result, shift); + code.shl(result, shift); } else { - code->xor_(result, result); + code.xor_(result, result); } ctx.reg_alloc.DefineValue(inst, result); @@ -226,10 +226,10 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { // The 32-bit x64 SHL instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. - code->shl(result, code->cl); - code->xor_(zero, zero); - code->cmp(code->cl, 32); - code->cmovnb(result, zero); + code.shl(result, code.cl); + code.xor_(zero, zero); + code.cmp(code.cl, 32); + code.cmovnb(result, zero); ctx.reg_alloc.DefineValue(inst, result); } @@ -242,16 +242,16 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { if (shift == 0) { // There is nothing more to do. } else if (shift < 32) { - code->bt(carry.cvt32(), 0); - code->shl(result, shift); - code->setc(carry.cvt8()); + code.bt(carry.cvt32(), 0); + code.shl(result, shift); + code.setc(carry.cvt8()); } else if (shift > 32) { - code->xor_(result, result); - code->xor_(carry, carry); + code.xor_(result, result); + code.xor_(carry, carry); } else { - code->mov(carry, result); - code->xor_(result, result); - code->and_(carry, 1); + code.mov(carry, result); + code.xor_(result, result); + code.and_(carry, 1); } ctx.reg_alloc.DefineValue(carry_inst, carry); @@ -264,30 +264,30 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { // TODO: Optimize this. - code->inLocalLabel(); + code.inLocalLabel(); - code->cmp(code->cl, 32); - code->ja(".Rs_gt32"); - code->je(".Rs_eq32"); + code.cmp(code.cl, 32); + code.ja(".Rs_gt32"); + code.je(".Rs_eq32"); // if (Rs & 0xFF < 32) { - code->bt(carry.cvt32(), 0); // Set the carry flag for correct behaviour in the case when Rs & 0xFF == 0 - code->shl(result, code->cl); - code->setc(carry.cvt8()); - code->jmp(".end"); + code.bt(carry.cvt32(), 0); // Set the carry flag for correct behaviour in the case when Rs & 0xFF == 0 + code.shl(result, code.cl); + code.setc(carry.cvt8()); + code.jmp(".end"); // } else if (Rs & 0xFF > 32) { - code->L(".Rs_gt32"); - code->xor_(result, result); - code->xor_(carry, carry); - code->jmp(".end"); + code.L(".Rs_gt32"); + code.xor_(result, result); + code.xor_(carry, carry); + code.jmp(".end"); // } else if (Rs & 0xFF == 32) { - code->L(".Rs_eq32"); - code->mov(carry, result); - code->and_(carry, 1); - code->xor_(result, result); + code.L(".Rs_eq32"); + code.mov(carry, result); + code.and_(carry, 1); + code.xor_(result, result); // } - code->L(".end"); + code.L(".end"); - code->outLocalLabel(); + code.outLocalLabel(); ctx.reg_alloc.DefineValue(carry_inst, carry); ctx.EraseInstruction(carry_inst); @@ -306,9 +306,9 @@ void EmitX64::EmitLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { u8 shift = shift_arg.GetImmediateU8(); if (shift < 64) { - code->shl(result, shift); + code.shl(result, shift); } else { - code->xor_(result.cvt32(), result.cvt32()); + code.xor_(result.cvt32(), result.cvt32()); } ctx.reg_alloc.DefineValue(inst, result); @@ -320,10 +320,10 @@ void EmitX64::EmitLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { // The x64 SHL instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. - code->shl(result, code->cl); - code->xor_(zero.cvt32(), zero.cvt32()); - code->cmp(code->cl, 64); - code->cmovnb(result, zero); + code.shl(result, code.cl); + code.xor_(zero.cvt32(), zero.cvt32()); + code.cmp(code.cl, 64); + code.cmovnb(result, zero); ctx.reg_alloc.DefineValue(inst, result); } @@ -343,9 +343,9 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { u8 shift = shift_arg.GetImmediateU8(); if (shift <= 31) { - code->shr(result, shift); + code.shr(result, shift); } else { - code->xor_(result, result); + code.xor_(result, result); } ctx.reg_alloc.DefineValue(inst, result); @@ -357,10 +357,10 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { // The 32-bit x64 SHR instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. - code->shr(result, code->cl); - code->xor_(zero, zero); - code->cmp(code->cl, 32); - code->cmovnb(result, zero); + code.shr(result, code.cl); + code.xor_(zero, zero); + code.cmp(code.cl, 32); + code.cmovnb(result, zero); ctx.reg_alloc.DefineValue(inst, result); } @@ -373,15 +373,15 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { if (shift == 0) { // There is nothing more to do. } else if (shift < 32) { - code->shr(result, shift); - code->setc(carry.cvt8()); + code.shr(result, shift); + code.setc(carry.cvt8()); } else if (shift == 32) { - code->bt(result, 31); - code->setc(carry.cvt8()); - code->mov(result, 0); + code.bt(result, 31); + code.setc(carry.cvt8()); + code.mov(result, 0); } else { - code->xor_(result, result); - code->xor_(carry, carry); + code.xor_(result, result); + code.xor_(carry, carry); } ctx.reg_alloc.DefineValue(carry_inst, carry); @@ -394,32 +394,32 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { // TODO: Optimize this. - code->inLocalLabel(); + code.inLocalLabel(); - code->cmp(code->cl, 32); - code->ja(".Rs_gt32"); - code->je(".Rs_eq32"); + code.cmp(code.cl, 32); + code.ja(".Rs_gt32"); + code.je(".Rs_eq32"); // if (Rs & 0xFF == 0) goto end; - code->test(code->cl, code->cl); - code->jz(".end"); + code.test(code.cl, code.cl); + code.jz(".end"); // if (Rs & 0xFF < 32) { - code->shr(result, code->cl); - code->setc(carry.cvt8()); - code->jmp(".end"); + code.shr(result, code.cl); + code.setc(carry.cvt8()); + code.jmp(".end"); // } else if (Rs & 0xFF > 32) { - code->L(".Rs_gt32"); - code->xor_(result, result); - code->xor_(carry, carry); - code->jmp(".end"); + code.L(".Rs_gt32"); + code.xor_(result, result); + code.xor_(carry, carry); + code.jmp(".end"); // } else if (Rs & 0xFF == 32) { - code->L(".Rs_eq32"); - code->bt(result, 31); - code->setc(carry.cvt8()); - code->xor_(result, result); + code.L(".Rs_eq32"); + code.bt(result, 31); + code.setc(carry.cvt8()); + code.xor_(result, result); // } - code->L(".end"); + code.L(".end"); - code->outLocalLabel(); + code.outLocalLabel(); ctx.reg_alloc.DefineValue(carry_inst, carry); ctx.EraseInstruction(carry_inst); @@ -438,9 +438,9 @@ void EmitX64::EmitLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { u8 shift = shift_arg.GetImmediateU8(); if (shift < 64) { - code->shr(result, shift); + code.shr(result, shift); } else { - code->xor_(result.cvt32(), result.cvt32()); + code.xor_(result.cvt32(), result.cvt32()); } ctx.reg_alloc.DefineValue(inst, result); @@ -452,10 +452,10 @@ void EmitX64::EmitLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { // The x64 SHR instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. - code->shr(result, code->cl); - code->xor_(zero.cvt32(), zero.cvt32()); - code->cmp(code->cl, 64); - code->cmovnb(result, zero); + code.shr(result, code.cl); + code.xor_(zero.cvt32(), zero.cvt32()); + code.cmp(code.cl, 64); + code.cmovnb(result, zero); ctx.reg_alloc.DefineValue(inst, result); } @@ -474,7 +474,7 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { u8 shift = shift_arg.GetImmediateU8(); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); - code->sar(result, u8(shift < 31 ? shift : 31)); + code.sar(result, u8(shift < 31 ? shift : 31)); ctx.reg_alloc.DefineValue(inst, result); } else { @@ -486,11 +486,11 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { // ARM differs from the behaviour: It does not mask the count. // We note that all shift values above 31 have the same behaviour as 31 does, so we saturate `shift` to 31. - code->mov(const31, 31); - code->movzx(code->ecx, code->cl); - code->cmp(code->ecx, u32(31)); - code->cmovg(code->ecx, const31); - code->sar(result, code->cl); + code.mov(const31, 31); + code.movzx(code.ecx, code.cl); + code.cmp(code.ecx, u32(31)); + code.cmovg(code.ecx, const31); + code.sar(result, code.cl); ctx.reg_alloc.DefineValue(inst, result); } @@ -503,12 +503,12 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { if (shift == 0) { // There is nothing more to do. } else if (shift <= 31) { - code->sar(result, shift); - code->setc(carry); + code.sar(result, shift); + code.setc(carry); } else { - code->sar(result, 31); - code->bt(result, 31); - code->setc(carry); + code.sar(result, 31); + code.bt(result, 31); + code.setc(carry); } ctx.reg_alloc.DefineValue(carry_inst, carry); @@ -521,26 +521,26 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { // TODO: Optimize this. - code->inLocalLabel(); + code.inLocalLabel(); - code->cmp(code->cl, u32(31)); - code->ja(".Rs_gt31"); + code.cmp(code.cl, u32(31)); + code.ja(".Rs_gt31"); // if (Rs & 0xFF == 0) goto end; - code->test(code->cl, code->cl); - code->jz(".end"); + code.test(code.cl, code.cl); + code.jz(".end"); // if (Rs & 0xFF <= 31) { - code->sar(result, code->cl); - code->setc(carry); - code->jmp(".end"); + code.sar(result, code.cl); + code.setc(carry); + code.jmp(".end"); // } else if (Rs & 0xFF > 31) { - code->L(".Rs_gt31"); - code->sar(result, 31); // 31 produces the same results as anything above 31 - code->bt(result, 31); - code->setc(carry); + code.L(".Rs_gt31"); + code.sar(result, 31); // 31 produces the same results as anything above 31 + code.bt(result, 31); + code.setc(carry); // } - code->L(".end"); + code.L(".end"); - code->outLocalLabel(); + code.outLocalLabel(); ctx.reg_alloc.DefineValue(carry_inst, carry); ctx.EraseInstruction(carry_inst); @@ -558,7 +558,7 @@ void EmitX64::EmitArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) { u8 shift = shift_arg.GetImmediateU8(); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); - code->sar(result, u8(shift < 63 ? shift : 63)); + code.sar(result, u8(shift < 63 ? shift : 63)); ctx.reg_alloc.DefineValue(inst, result); } else { @@ -570,11 +570,11 @@ void EmitX64::EmitArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) { // ARM differs from the behaviour: It does not mask the count. // We note that all shift values above 63 have the same behaviour as 63 does, so we saturate `shift` to 63. - code->mov(const63, 63); - code->movzx(code->ecx, code->cl); - code->cmp(code->ecx, u32(63)); - code->cmovg(code->ecx, const63); - code->sar(result, code->cl); + code.mov(const63, 63); + code.movzx(code.ecx, code.cl); + code.cmp(code.ecx, u32(63)); + code.cmovg(code.ecx, const63); + code.sar(result, code.cl); ctx.reg_alloc.DefineValue(inst, result); } @@ -593,7 +593,7 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { u8 shift = shift_arg.GetImmediateU8(); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); - code->ror(result, u8(shift & 0x1F)); + code.ror(result, u8(shift & 0x1F)); ctx.reg_alloc.DefineValue(inst, result); } else { @@ -601,7 +601,7 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); // x64 ROR instruction does (shift & 0x1F) for us. - code->ror(result, code->cl); + code.ror(result, code.cl); ctx.reg_alloc.DefineValue(inst, result); } @@ -614,11 +614,11 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { if (shift == 0) { // There is nothing more to do. } else if ((shift & 0x1F) == 0) { - code->bt(result, u8(31)); - code->setc(carry); + code.bt(result, u8(31)); + code.setc(carry); } else { - code->ror(result, shift); - code->setc(carry); + code.ror(result, shift); + code.setc(carry); } ctx.reg_alloc.DefineValue(carry_inst, carry); @@ -631,26 +631,26 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { // TODO: Optimize - code->inLocalLabel(); + code.inLocalLabel(); // if (Rs & 0xFF == 0) goto end; - code->test(code->cl, code->cl); - code->jz(".end"); + code.test(code.cl, code.cl); + code.jz(".end"); - code->and_(code->ecx, u32(0x1F)); - code->jz(".zero_1F"); + code.and_(code.ecx, u32(0x1F)); + code.jz(".zero_1F"); // if (Rs & 0x1F != 0) { - code->ror(result, code->cl); - code->setc(carry); - code->jmp(".end"); + code.ror(result, code.cl); + code.setc(carry); + code.jmp(".end"); // } else { - code->L(".zero_1F"); - code->bt(result, u8(31)); - code->setc(carry); + code.L(".zero_1F"); + code.bt(result, u8(31)); + code.setc(carry); // } - code->L(".end"); + code.L(".end"); - code->outLocalLabel(); + code.outLocalLabel(); ctx.reg_alloc.DefineValue(carry_inst, carry); ctx.EraseInstruction(carry_inst); @@ -668,7 +668,7 @@ void EmitX64::EmitRotateRight64(EmitContext& ctx, IR::Inst* inst) { u8 shift = shift_arg.GetImmediateU8(); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); - code->ror(result, u8(shift & 0x3F)); + code.ror(result, u8(shift & 0x3F)); ctx.reg_alloc.DefineValue(inst, result); } else { @@ -676,7 +676,7 @@ void EmitX64::EmitRotateRight64(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); // x64 ROR instruction does (shift & 0x3F) for us. - code->ror(result, code->cl); + code.ror(result, code.cl); ctx.reg_alloc.DefineValue(inst, result); } @@ -690,11 +690,11 @@ void EmitX64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(args[1]).cvt8(); - code->bt(carry.cvt32(), 0); - code->rcr(result, 1); + code.bt(carry.cvt32(), 0); + code.rcr(result, 1); if (carry_inst) { - code->setc(carry); + code.setc(carry); ctx.reg_alloc.DefineValue(carry_inst, carry); ctx.EraseInstruction(carry_inst); @@ -713,16 +713,16 @@ static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* ca } } -static Xbyak::Reg64 DoNZCV(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* nzcv_out) { +static Xbyak::Reg64 DoNZCV(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* nzcv_out) { if (!nzcv_out) return INVALID_REG; Xbyak::Reg64 nzcv = reg_alloc.ScratchGpr({HostLoc::RAX}); - code->xor_(nzcv.cvt32(), nzcv.cvt32()); + code.xor_(nzcv.cvt32(), nzcv.cvt32()); return nzcv; } -static void EmitAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, int bitsize) { +static void EmitAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bitsize) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); auto nzcv_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetNZCVFromOp); @@ -741,44 +741,44 @@ static void EmitAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, int bit u32 op_arg = args[1].GetImmediateU32(); if (carry_in.IsImmediate()) { if (carry_in.GetImmediateU1()) { - code->stc(); - code->adc(result, op_arg); + code.stc(); + code.adc(result, op_arg); } else { - code->add(result, op_arg); + code.add(result, op_arg); } } else { - code->bt(carry.cvt32(), 0); - code->adc(result, op_arg); + code.bt(carry.cvt32(), 0); + code.adc(result, op_arg); } } else { OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(bitsize); if (carry_in.IsImmediate()) { if (carry_in.GetImmediateU1()) { - code->stc(); - code->adc(result, *op_arg); + code.stc(); + code.adc(result, *op_arg); } else { - code->add(result, *op_arg); + code.add(result, *op_arg); } } else { - code->bt(carry.cvt32(), 0); - code->adc(result, *op_arg); + code.bt(carry.cvt32(), 0); + code.adc(result, *op_arg); } } if (nzcv_inst) { - code->lahf(); - code->seto(code->al); + code.lahf(); + code.seto(code.al); ctx.reg_alloc.DefineValue(nzcv_inst, nzcv); ctx.EraseInstruction(nzcv_inst); } if (carry_inst) { - code->setc(carry); + code.setc(carry); ctx.reg_alloc.DefineValue(carry_inst, carry); ctx.EraseInstruction(carry_inst); } if (overflow_inst) { - code->seto(overflow); + code.seto(overflow); ctx.reg_alloc.DefineValue(overflow_inst, overflow); ctx.EraseInstruction(overflow_inst); } @@ -794,7 +794,7 @@ void EmitX64::EmitAdd64(EmitContext& ctx, IR::Inst* inst) { EmitAdd(code, ctx, inst, 64); } -static void EmitSub(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, int bitsize) { +static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bitsize) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); auto nzcv_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetNZCVFromOp); @@ -815,47 +815,47 @@ static void EmitSub(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, int bit u32 op_arg = args[1].GetImmediateU32(); if (carry_in.IsImmediate()) { if (carry_in.GetImmediateU1()) { - code->sub(result, op_arg); + code.sub(result, op_arg); } else { - code->stc(); - code->sbb(result, op_arg); + code.stc(); + code.sbb(result, op_arg); } } else { - code->bt(carry.cvt32(), 0); - code->cmc(); - code->sbb(result, op_arg); + code.bt(carry.cvt32(), 0); + code.cmc(); + code.sbb(result, op_arg); } } else { OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(bitsize); if (carry_in.IsImmediate()) { if (carry_in.GetImmediateU1()) { - code->sub(result, *op_arg); + code.sub(result, *op_arg); } else { - code->stc(); - code->sbb(result, *op_arg); + code.stc(); + code.sbb(result, *op_arg); } } else { - code->bt(carry.cvt32(), 0); - code->cmc(); - code->sbb(result, *op_arg); + code.bt(carry.cvt32(), 0); + code.cmc(); + code.sbb(result, *op_arg); } } if (nzcv_inst) { - code->cmc(); - code->lahf(); - code->seto(code->al); + code.cmc(); + code.lahf(); + code.seto(code.al); ctx.reg_alloc.DefineValue(nzcv_inst, nzcv); ctx.EraseInstruction(nzcv_inst); } if (carry_inst) { - code->setnc(carry); + code.setnc(carry); ctx.reg_alloc.DefineValue(carry_inst, carry); ctx.EraseInstruction(carry_inst); } if (overflow_inst) { - code->seto(overflow); + code.seto(overflow); ctx.reg_alloc.DefineValue(overflow_inst, overflow); ctx.EraseInstruction(overflow_inst); } @@ -876,12 +876,12 @@ void EmitX64::EmitMul32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); if (args[1].IsImmediate()) { - code->imul(result, result, args[1].GetImmediateU32()); + code.imul(result, result, args[1].GetImmediateU32()); } else { OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); - code->imul(result, *op_arg); + code.imul(result, *op_arg); } ctx.reg_alloc.DefineValue(inst, result); } @@ -892,7 +892,7 @@ void EmitX64::EmitMul64(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); - code->imul(result, *op_arg); + code.imul(result, *op_arg); ctx.reg_alloc.DefineValue(inst, result); } @@ -907,13 +907,13 @@ void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Label end; - code->xor_(eax, eax); - code->test(divisor, divisor); - code->jz(end); - code->mov(eax, dividend); - code->xor_(edx, edx); - code->div(divisor); - code->L(end); + code.xor_(eax, eax); + code.test(divisor, divisor); + code.jz(end); + code.mov(eax, dividend); + code.xor_(edx, edx); + code.div(divisor); + code.L(end); ctx.reg_alloc.DefineValue(inst, eax); } @@ -928,13 +928,13 @@ void EmitX64::EmitUnsignedDiv64(EmitContext& ctx, IR::Inst* inst) { Xbyak::Label end; - code->xor_(eax, eax); - code->test(divisor, divisor); - code->jz(end); - code->mov(rax, dividend); - code->xor_(edx, edx); - code->div(divisor); - code->L(end); + code.xor_(eax, eax); + code.test(divisor, divisor); + code.jz(end); + code.mov(rax, dividend); + code.xor_(edx, edx); + code.div(divisor); + code.L(end); ctx.reg_alloc.DefineValue(inst, rax); } @@ -949,13 +949,13 @@ void EmitX64::EmitSignedDiv32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Label end; - code->xor_(eax, eax); - code->test(divisor, divisor); - code->jz(end); - code->mov(eax, dividend); - code->cdq(); - code->idiv(divisor); - code->L(end); + code.xor_(eax, eax); + code.test(divisor, divisor); + code.jz(end); + code.mov(eax, dividend); + code.cdq(); + code.idiv(divisor); + code.L(end); ctx.reg_alloc.DefineValue(inst, eax); } @@ -970,13 +970,13 @@ void EmitX64::EmitSignedDiv64(EmitContext& ctx, IR::Inst* inst) { Xbyak::Label end; - code->xor_(eax, eax); - code->test(divisor, divisor); - code->jz(end); - code->mov(rax, dividend); - code->cqo(); - code->idiv(divisor); - code->L(end); + code.xor_(eax, eax); + code.test(divisor, divisor); + code.jz(end); + code.mov(rax, dividend); + code.cqo(); + code.idiv(divisor); + code.L(end); ctx.reg_alloc.DefineValue(inst, rax); } @@ -989,12 +989,12 @@ void EmitX64::EmitAnd32(EmitContext& ctx, IR::Inst* inst) { if (args[1].IsImmediate()) { u32 op_arg = args[1].GetImmediateU32(); - code->and_(result, op_arg); + code.and_(result, op_arg); } else { OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); - code->and_(result, *op_arg); + code.and_(result, *op_arg); } ctx.reg_alloc.DefineValue(inst, result); @@ -1008,12 +1008,12 @@ void EmitX64::EmitAnd64(EmitContext& ctx, IR::Inst* inst) { if (args[1].FitsInImmediateS32()) { u32 op_arg = u32(args[1].GetImmediateS32()); - code->and_(result, op_arg); + code.and_(result, op_arg); } else { OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(64); - code->and_(result, *op_arg); + code.and_(result, *op_arg); } ctx.reg_alloc.DefineValue(inst, result); @@ -1027,12 +1027,12 @@ void EmitX64::EmitEor32(EmitContext& ctx, IR::Inst* inst) { if (args[1].IsImmediate()) { u32 op_arg = args[1].GetImmediateU32(); - code->xor_(result, op_arg); + code.xor_(result, op_arg); } else { OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); - code->xor_(result, *op_arg); + code.xor_(result, *op_arg); } ctx.reg_alloc.DefineValue(inst, result); @@ -1046,12 +1046,12 @@ void EmitX64::EmitEor64(EmitContext& ctx, IR::Inst* inst) { if (args[1].FitsInImmediateS32()) { u32 op_arg = u32(args[1].GetImmediateS32()); - code->xor_(result, op_arg); + code.xor_(result, op_arg); } else { OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(64); - code->xor_(result, *op_arg); + code.xor_(result, *op_arg); } ctx.reg_alloc.DefineValue(inst, result); @@ -1065,12 +1065,12 @@ void EmitX64::EmitOr32(EmitContext& ctx, IR::Inst* inst) { if (args[1].IsImmediate()) { u32 op_arg = args[1].GetImmediateU32(); - code->or_(result, op_arg); + code.or_(result, op_arg); } else { OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); - code->or_(result, *op_arg); + code.or_(result, *op_arg); } ctx.reg_alloc.DefineValue(inst, result); @@ -1084,12 +1084,12 @@ void EmitX64::EmitOr64(EmitContext& ctx, IR::Inst* inst) { if (args[1].FitsInImmediateS32()) { u32 op_arg = u32(args[1].GetImmediateS32()); - code->or_(result, op_arg); + code.or_(result, op_arg); } else { OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(64); - code->or_(result, *op_arg); + code.or_(result, *op_arg); } ctx.reg_alloc.DefineValue(inst, result); @@ -1101,10 +1101,10 @@ void EmitX64::EmitNot32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 result; if (args[0].IsImmediate()) { result = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(result, u32(~args[0].GetImmediateU32())); + code.mov(result, u32(~args[0].GetImmediateU32())); } else { result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code->not_(result); + code.not_(result); } ctx.reg_alloc.DefineValue(inst, result); } @@ -1115,10 +1115,10 @@ void EmitX64::EmitNot64(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg64 result; if (args[0].IsImmediate()) { result = ctx.reg_alloc.ScratchGpr(); - code->mov(result, ~args[0].GetImmediateU64()); + code.mov(result, ~args[0].GetImmediateU64()); } else { result = ctx.reg_alloc.UseScratchGpr(args[0]); - code->not_(result); + code.not_(result); } ctx.reg_alloc.DefineValue(inst, result); } @@ -1126,70 +1126,70 @@ void EmitX64::EmitNot64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitSignExtendByteToWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); - code->movsx(result.cvt32(), result.cvt8()); + code.movsx(result.cvt32(), result.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSignExtendHalfToWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); - code->movsx(result.cvt32(), result.cvt16()); + code.movsx(result.cvt32(), result.cvt16()); ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSignExtendByteToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); - code->movsx(result.cvt64(), result.cvt8()); + code.movsx(result.cvt64(), result.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSignExtendHalfToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); - code->movsx(result.cvt64(), result.cvt16()); + code.movsx(result.cvt64(), result.cvt16()); ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitSignExtendWordToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); - code->movsxd(result.cvt64(), result.cvt32()); + code.movsxd(result.cvt64(), result.cvt32()); ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitZeroExtendByteToWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); - code->movzx(result.cvt32(), result.cvt8()); + code.movzx(result.cvt32(), result.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitZeroExtendHalfToWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); - code->movzx(result.cvt32(), result.cvt16()); + code.movzx(result.cvt32(), result.cvt16()); ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitZeroExtendByteToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); - code->movzx(result.cvt32(), result.cvt8()); // x64 zeros upper 32 bits on a 32-bit move + code.movzx(result.cvt32(), result.cvt8()); // x64 zeros upper 32 bits on a 32-bit move ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitZeroExtendHalfToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); - code->movzx(result.cvt32(), result.cvt16()); // x64 zeros upper 32 bits on a 32-bit move + code.movzx(result.cvt32(), result.cvt16()); // x64 zeros upper 32 bits on a 32-bit move ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitZeroExtendWordToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); - code->mov(result.cvt32(), result.cvt32()); // x64 zeros upper 32 bits on a 32-bit move + code.mov(result.cvt32(), result.cvt32()); // x64 zeros upper 32 bits on a 32-bit move ctx.reg_alloc.DefineValue(inst, result); } @@ -1198,11 +1198,11 @@ void EmitX64::EmitZeroExtendLongToQuad(EmitContext& ctx, IR::Inst* inst) { if (args[0].IsInGpr()) { Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]); Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - code->movq(result, source); + code.movq(result, source); ctx.reg_alloc.DefineValue(inst, result); } else { Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - code->movq(result, result); + code.movq(result, result); ctx.reg_alloc.DefineValue(inst, result); } } @@ -1210,31 +1210,31 @@ void EmitX64::EmitZeroExtendLongToQuad(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitByteReverseWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - code->bswap(result); + code.bswap(result); ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitByteReverseHalf(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg16 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt16(); - code->rol(result, 8); + code.rol(result, 8); ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitByteReverseDual(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); - code->bswap(result); + code.bswap(result); ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) { Xbyak::Reg32 source = ctx.reg_alloc.UseGpr(args[0]).cvt32(); Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - code->lzcnt(result, source); + code.lzcnt(result, source); ctx.reg_alloc.DefineValue(inst, result); } else { @@ -1242,11 +1242,11 @@ void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); // The result of a bsr of zero is undefined, but zf is set after it. - code->bsr(result, source); - code->mov(source, 0xFFFFFFFF); - code->cmovz(result, source); - code->neg(result); - code->add(result, 31); + code.bsr(result, source); + code.mov(source, 0xFFFFFFFF); + code.cmovz(result, source); + code.neg(result); + code.add(result, 31); ctx.reg_alloc.DefineValue(inst, result); } @@ -1254,11 +1254,11 @@ void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) { Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]).cvt64(); Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); - code->lzcnt(result, source); + code.lzcnt(result, source); ctx.reg_alloc.DefineValue(inst, result); } else { @@ -1266,11 +1266,11 @@ void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); // The result of a bsr of zero is undefined, but zf is set after it. - code->bsr(result, source); - code->mov(source.cvt32(), 0xFFFFFFFF); - code->cmovz(result.cvt32(), source.cvt32()); - code->neg(result.cvt32()); - code->add(result.cvt32(), 63); + code.bsr(result, source); + code.mov(source.cvt32(), 0xFFFFFFFF); + code.cmovz(result.cvt32(), source.cvt32()); + code.neg(result.cvt32()); + code.add(result.cvt32(), 63); ctx.reg_alloc.DefineValue(inst, result); } diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 4cb44b32..392e9986 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -29,96 +29,96 @@ constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double -static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { +static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { Xbyak::Label end; // We need to report back whether we've found a denormal on input. // SSE doesn't do this for us when SSE's DAZ is enabled. - code->movd(gpr_scratch, xmm_value); - code->and_(gpr_scratch, u32(0x7FFFFFFF)); - code->sub(gpr_scratch, u32(1)); - code->cmp(gpr_scratch, u32(0x007FFFFE)); - code->ja(end); - code->pxor(xmm_value, xmm_value); - code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_IDC], u32(1 << 7)); - code->L(end); + code.movd(gpr_scratch, xmm_value); + code.and_(gpr_scratch, u32(0x7FFFFFFF)); + code.sub(gpr_scratch, u32(1)); + code.cmp(gpr_scratch, u32(0x007FFFFE)); + code.ja(end); + code.pxor(xmm_value, xmm_value); + code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_IDC], u32(1 << 7)); + code.L(end); } -static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { +static void DenormalsAreZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { Xbyak::Label end; - auto mask = code->MConst(f64_non_sign_mask); + auto mask = code.MConst(f64_non_sign_mask); mask.setBit(64); - auto penult_denormal = code->MConst(f64_penultimate_positive_denormal); + auto penult_denormal = code.MConst(f64_penultimate_positive_denormal); penult_denormal.setBit(64); - code->movq(gpr_scratch, xmm_value); - code->and_(gpr_scratch, mask); - code->sub(gpr_scratch, u32(1)); - code->cmp(gpr_scratch, penult_denormal); - code->ja(end); - code->pxor(xmm_value, xmm_value); - code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_IDC], u32(1 << 7)); - code->L(end); + code.movq(gpr_scratch, xmm_value); + code.and_(gpr_scratch, mask); + code.sub(gpr_scratch, u32(1)); + code.cmp(gpr_scratch, penult_denormal); + code.ja(end); + code.pxor(xmm_value, xmm_value); + code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_IDC], u32(1 << 7)); + code.L(end); } -static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { +static void FlushToZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { Xbyak::Label end; - code->movd(gpr_scratch, xmm_value); - code->and_(gpr_scratch, u32(0x7FFFFFFF)); - code->sub(gpr_scratch, u32(1)); - code->cmp(gpr_scratch, u32(0x007FFFFE)); - code->ja(end); - code->pxor(xmm_value, xmm_value); - code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3)); - code->L(end); + code.movd(gpr_scratch, xmm_value); + code.and_(gpr_scratch, u32(0x7FFFFFFF)); + code.sub(gpr_scratch, u32(1)); + code.cmp(gpr_scratch, u32(0x007FFFFE)); + code.ja(end); + code.pxor(xmm_value, xmm_value); + code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3)); + code.L(end); } -static void FlushToZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { +static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { Xbyak::Label end; - auto mask = code->MConst(f64_non_sign_mask); + auto mask = code.MConst(f64_non_sign_mask); mask.setBit(64); - auto penult_denormal = code->MConst(f64_penultimate_positive_denormal); + auto penult_denormal = code.MConst(f64_penultimate_positive_denormal); penult_denormal.setBit(64); - code->movq(gpr_scratch, xmm_value); - code->and_(gpr_scratch, mask); - code->sub(gpr_scratch, u32(1)); - code->cmp(gpr_scratch, penult_denormal); - code->ja(end); - code->pxor(xmm_value, xmm_value); - code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3)); - code->L(end); + code.movq(gpr_scratch, xmm_value); + code.and_(gpr_scratch, mask); + code.sub(gpr_scratch, u32(1)); + code.cmp(gpr_scratch, penult_denormal); + code.ja(end); + code.pxor(xmm_value, xmm_value); + code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3)); + code.L(end); } -static void DefaultNaN32(BlockOfCode* code, Xbyak::Xmm xmm_value) { +static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) { Xbyak::Label end; - code->ucomiss(xmm_value, xmm_value); - code->jnp(end); - code->movaps(xmm_value, code->MConst(f32_nan)); - code->L(end); + code.ucomiss(xmm_value, xmm_value); + code.jnp(end); + code.movaps(xmm_value, code.MConst(f32_nan)); + code.L(end); } -static void DefaultNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value) { +static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) { Xbyak::Label end; - code->ucomisd(xmm_value, xmm_value); - code->jnp(end); - code->movaps(xmm_value, code->MConst(f64_nan)); - code->L(end); + code.ucomisd(xmm_value, xmm_value); + code.jnp(end); + code.movaps(xmm_value, code.MConst(f64_nan)); + code.L(end); } -static void ZeroIfNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) { - code->pxor(xmm_scratch, xmm_scratch); - code->cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN) - code->pand(xmm_value, xmm_scratch); +static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) { + code.pxor(xmm_scratch, xmm_scratch); + code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN) + code.pand(xmm_value, xmm_scratch); } -static void FPThreeOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { +static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -129,7 +129,7 @@ static void FPThreeOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi DenormalsAreZero32(code, result, gpr_scratch); DenormalsAreZero32(code, operand, gpr_scratch); } - (code->*fn)(result, operand); + (code.*fn)(result, operand); if (ctx.FPSCR_FTZ()) { FlushToZero32(code, result, gpr_scratch); } @@ -140,7 +140,7 @@ static void FPThreeOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi ctx.reg_alloc.DefineValue(inst, result); } -static void FPThreeOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { +static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -151,7 +151,7 @@ static void FPThreeOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi DenormalsAreZero64(code, result, gpr_scratch); DenormalsAreZero64(code, operand, gpr_scratch); } - (code->*fn)(result, operand); + (code.*fn)(result, operand); if (ctx.FPSCR_FTZ()) { FlushToZero64(code, result, gpr_scratch); } @@ -162,7 +162,7 @@ static void FPThreeOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi ctx.reg_alloc.DefineValue(inst, result); } -static void FPTwoOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { +static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -172,7 +172,7 @@ static void FPTwoOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void DenormalsAreZero32(code, result, gpr_scratch); } - (code->*fn)(result, result); + (code.*fn)(result, result); if (ctx.FPSCR_FTZ()) { FlushToZero32(code, result, gpr_scratch); } @@ -183,7 +183,7 @@ static void FPTwoOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void ctx.reg_alloc.DefineValue(inst, result); } -static void FPTwoOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { +static void FPTwoOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); @@ -193,7 +193,7 @@ static void FPTwoOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void DenormalsAreZero64(code, result, gpr_scratch); } - (code->*fn)(result, result); + (code.*fn)(result, result); if (ctx.FPSCR_FTZ()) { FlushToZero64(code, result, gpr_scratch); } @@ -208,7 +208,7 @@ void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - code->pand(result, code->MConst(f32_non_sign_mask)); + code.pand(result, code.MConst(f32_non_sign_mask)); ctx.reg_alloc.DefineValue(inst, result); } @@ -217,7 +217,7 @@ void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - code->pand(result, code->MConst(f64_non_sign_mask)); + code.pand(result, code.MConst(f64_non_sign_mask)); ctx.reg_alloc.DefineValue(inst, result); } @@ -226,7 +226,7 @@ void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - code->pxor(result, code->MConst(f32_negative_zero)); + code.pxor(result, code.MConst(f32_negative_zero)); ctx.reg_alloc.DefineValue(inst, result); } @@ -235,7 +235,7 @@ void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - code->pxor(result, code->MConst(f64_negative_zero)); + code.pxor(result, code.MConst(f64_negative_zero)); ctx.reg_alloc.DefineValue(inst, result); } @@ -280,16 +280,16 @@ void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) { FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::subsd); } -static void SetFpscrNzcvFromFlags(BlockOfCode* code, EmitContext& ctx) { +static void SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) { ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(nzcv, 0x28630000); - code->sete(cl); - code->rcl(cl, 3); - code->shl(nzcv, cl); - code->and_(nzcv, 0xF0000000); - code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_nzcv], nzcv); + code.mov(nzcv, 0x28630000); + code.sete(cl); + code.rcl(cl, 3); + code.shl(nzcv, cl); + code.and_(nzcv, 0xF0000000); + code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_nzcv], nzcv); } void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) { @@ -299,9 +299,9 @@ void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) { bool exc_on_qnan = args[2].GetImmediateU1(); if (exc_on_qnan) { - code->comiss(reg_a, reg_b); + code.comiss(reg_a, reg_b); } else { - code->ucomiss(reg_a, reg_b); + code.ucomiss(reg_a, reg_b); } SetFpscrNzcvFromFlags(code, ctx); @@ -314,9 +314,9 @@ void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) { bool exc_on_qnan = args[2].GetImmediateU1(); if (exc_on_qnan) { - code->comisd(reg_a, reg_b); + code.comisd(reg_a, reg_b); } else { - code->ucomisd(reg_a, reg_b); + code.ucomisd(reg_a, reg_b); } SetFpscrNzcvFromFlags(code, ctx); @@ -330,7 +330,7 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { if (ctx.FPSCR_FTZ()) { DenormalsAreZero32(code, result, gpr_scratch.cvt32()); } - code->cvtss2sd(result, result); + code.cvtss2sd(result, result); if (ctx.FPSCR_FTZ()) { FlushToZero64(code, result, gpr_scratch); } @@ -349,7 +349,7 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { if (ctx.FPSCR_FTZ()) { DenormalsAreZero64(code, result, gpr_scratch); } - code->cvtsd2ss(result, result); + code.cvtsd2ss(result, result); if (ctx.FPSCR_FTZ()) { FlushToZero32(code, result, gpr_scratch.cvt32()); } @@ -373,22 +373,22 @@ void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) { if (ctx.FPSCR_FTZ()) { DenormalsAreZero32(code, from, to); } - code->cvtss2sd(from, from); + code.cvtss2sd(from, from); // First time is to set flags if (round_towards_zero) { - code->cvttsd2si(to, from); // 32 bit gpr + code.cvttsd2si(to, from); // 32 bit gpr } else { - code->cvtsd2si(to, from); // 32 bit gpr + code.cvtsd2si(to, from); // 32 bit gpr } // Clamp to output range ZeroIfNaN64(code, from, xmm_scratch); - code->minsd(from, code->MConst(f64_max_s32)); - code->maxsd(from, code->MConst(f64_min_s32)); + code.minsd(from, code.MConst(f64_max_s32)); + code.maxsd(from, code.MConst(f64_min_s32)); // Second time is for real if (round_towards_zero) { - code->cvttsd2si(to, from); // 32 bit gpr + code.cvttsd2si(to, from); // 32 bit gpr } else { - code->cvtsd2si(to, from); // 32 bit gpr + code.cvtsd2si(to, from); // 32 bit gpr } ctx.reg_alloc.DefineValue(inst, to); @@ -412,19 +412,19 @@ void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) { if (ctx.FPSCR_FTZ()) { DenormalsAreZero32(code, from, to); } - code->cvtss2sd(from, from); + code.cvtss2sd(from, from); ZeroIfNaN64(code, from, xmm_scratch); // Bring into SSE range - code->addsd(from, code->MConst(f64_min_s32)); + code.addsd(from, code.MConst(f64_min_s32)); // First time is to set flags - code->cvtsd2si(to, from); // 32 bit gpr + code.cvtsd2si(to, from); // 32 bit gpr // Clamp to output range - code->minsd(from, code->MConst(f64_max_s32)); - code->maxsd(from, code->MConst(f64_min_s32)); + code.minsd(from, code.MConst(f64_max_s32)); + code.maxsd(from, code.MConst(f64_min_s32)); // Actually convert - code->cvtsd2si(to, from); // 32 bit gpr + code.cvtsd2si(to, from); // 32 bit gpr // Bring back into original range - code->add(to, u32(2147483648u)); + code.add(to, u32(2147483648u)); } else { Xbyak::Xmm xmm_mask = ctx.reg_alloc.ScratchXmm(); Xbyak::Reg32 gpr_mask = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -432,25 +432,25 @@ void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) { if (ctx.FPSCR_FTZ()) { DenormalsAreZero32(code, from, to); } - code->cvtss2sd(from, from); + code.cvtss2sd(from, from); ZeroIfNaN64(code, from, xmm_scratch); // Generate masks if out-of-signed-range - code->movaps(xmm_mask, code->MConst(f64_max_s32)); - code->cmpltsd(xmm_mask, from); - code->movd(gpr_mask, xmm_mask); - code->pand(xmm_mask, code->MConst(f64_min_s32)); - code->and_(gpr_mask, u32(2147483648u)); + code.movaps(xmm_mask, code.MConst(f64_max_s32)); + code.cmpltsd(xmm_mask, from); + code.movd(gpr_mask, xmm_mask); + code.pand(xmm_mask, code.MConst(f64_min_s32)); + code.and_(gpr_mask, u32(2147483648u)); // Bring into range if necessary - code->addsd(from, xmm_mask); + code.addsd(from, xmm_mask); // First time is to set flags - code->cvttsd2si(to, from); // 32 bit gpr + code.cvttsd2si(to, from); // 32 bit gpr // Clamp to output range - code->minsd(from, code->MConst(f64_max_s32)); - code->maxsd(from, code->MConst(f64_min_u32)); + code.minsd(from, code.MConst(f64_max_s32)); + code.maxsd(from, code.MConst(f64_min_u32)); // Actually convert - code->cvttsd2si(to, from); // 32 bit gpr + code.cvttsd2si(to, from); // 32 bit gpr // Bring back into original range if necessary - code->add(to, gpr_mask); + code.add(to, gpr_mask); } ctx.reg_alloc.DefineValue(inst, to); @@ -471,19 +471,19 @@ void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) { } // First time is to set flags if (round_towards_zero) { - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code.cvttsd2si(gpr_scratch, from); // 32 bit gpr } else { - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code.cvtsd2si(gpr_scratch, from); // 32 bit gpr } // Clamp to output range ZeroIfNaN64(code, from, xmm_scratch); - code->minsd(from, code->MConst(f64_max_s32)); - code->maxsd(from, code->MConst(f64_min_s32)); + code.minsd(from, code.MConst(f64_max_s32)); + code.maxsd(from, code.MConst(f64_min_s32)); // Second time is for real if (round_towards_zero) { - code->cvttsd2si(to, from); // 32 bit gpr + code.cvttsd2si(to, from); // 32 bit gpr } else { - code->cvtsd2si(to, from); // 32 bit gpr + code.cvtsd2si(to, from); // 32 bit gpr } ctx.reg_alloc.DefineValue(inst, to); @@ -507,16 +507,16 @@ void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) { } ZeroIfNaN64(code, from, xmm_scratch); // Bring into SSE range - code->addsd(from, code->MConst(f64_min_s32)); + code.addsd(from, code.MConst(f64_min_s32)); // First time is to set flags - code->cvtsd2si(gpr_scratch, from); // 32 bit gpr + code.cvtsd2si(gpr_scratch, from); // 32 bit gpr // Clamp to output range - code->minsd(from, code->MConst(f64_max_s32)); - code->maxsd(from, code->MConst(f64_min_s32)); + code.minsd(from, code.MConst(f64_max_s32)); + code.maxsd(from, code.MConst(f64_min_s32)); // Actually convert - code->cvtsd2si(to, from); // 32 bit gpr + code.cvtsd2si(to, from); // 32 bit gpr // Bring back into original range - code->add(to, u32(2147483648u)); + code.add(to, u32(2147483648u)); } else { Xbyak::Xmm xmm_mask = ctx.reg_alloc.ScratchXmm(); Xbyak::Reg32 gpr_mask = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -526,22 +526,22 @@ void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) { } ZeroIfNaN64(code, from, xmm_scratch); // Generate masks if out-of-signed-range - code->movaps(xmm_mask, code->MConst(f64_max_s32)); - code->cmpltsd(xmm_mask, from); - code->movd(gpr_mask, xmm_mask); - code->pand(xmm_mask, code->MConst(f64_min_s32)); - code->and_(gpr_mask, u32(2147483648u)); + code.movaps(xmm_mask, code.MConst(f64_max_s32)); + code.cmpltsd(xmm_mask, from); + code.movd(gpr_mask, xmm_mask); + code.pand(xmm_mask, code.MConst(f64_min_s32)); + code.and_(gpr_mask, u32(2147483648u)); // Bring into range if necessary - code->addsd(from, xmm_mask); + code.addsd(from, xmm_mask); // First time is to set flags - code->cvttsd2si(gpr_scratch, from); // 32 bit gpr + code.cvttsd2si(gpr_scratch, from); // 32 bit gpr // Clamp to output range - code->minsd(from, code->MConst(f64_max_s32)); - code->maxsd(from, code->MConst(f64_min_u32)); + code.minsd(from, code.MConst(f64_max_s32)); + code.maxsd(from, code.MConst(f64_min_u32)); // Actually convert - code->cvttsd2si(to, from); // 32 bit gpr + code.cvttsd2si(to, from); // 32 bit gpr // Bring back into original range if necessary - code->add(to, gpr_mask); + code.add(to, gpr_mask); } ctx.reg_alloc.DefineValue(inst, to); @@ -554,7 +554,7 @@ void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) { bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); - code->cvtsi2ss(to, from); + code.cvtsi2ss(to, from); ctx.reg_alloc.DefineValue(inst, to); } @@ -567,8 +567,8 @@ void EmitX64::EmitFPU32ToSingle(EmitContext& ctx, IR::Inst* inst) { ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed - code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary - code->cvtsi2ss(to, from); + code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary + code.cvtsi2ss(to, from); ctx.reg_alloc.DefineValue(inst, to); } @@ -580,7 +580,7 @@ void EmitX64::EmitFPS32ToDouble(EmitContext& ctx, IR::Inst* inst) { bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); - code->cvtsi2sd(to, from); + code.cvtsi2sd(to, from); ctx.reg_alloc.DefineValue(inst, to); } @@ -593,8 +593,8 @@ void EmitX64::EmitFPU32ToDouble(EmitContext& ctx, IR::Inst* inst) { ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed - code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary - code->cvtsi2sd(to, from); + code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary + code.cvtsi2sd(to, from); ctx.reg_alloc.DefineValue(inst, to); } diff --git a/src/backend_x64/emit_x64_packed.cpp b/src/backend_x64/emit_x64_packed.cpp index 9aa4f427..4c5015f7 100644 --- a/src/backend_x64/emit_x64_packed.cpp +++ b/src/backend_x64/emit_x64_packed.cpp @@ -23,18 +23,18 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - code->paddb(xmm_a, xmm_b); + code.paddb(xmm_a, xmm_b); if (ge_inst) { Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); - code->pcmpeqb(ones, ones); + code.pcmpeqb(ones, ones); - code->movdqa(xmm_ge, xmm_a); - code->pminub(xmm_ge, xmm_b); - code->pcmpeqb(xmm_ge, xmm_b); - code->pxor(xmm_ge, ones); + code.movdqa(xmm_ge, xmm_a); + code.pminub(xmm_ge, xmm_b); + code.pcmpeqb(xmm_ge, xmm_b); + code.pxor(xmm_ge, ones); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.EraseInstruction(ge_inst); @@ -54,18 +54,18 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - code->pxor(xmm_ge, xmm_ge); - code->movdqa(saturated_sum, xmm_a); - code->paddsb(saturated_sum, xmm_b); - code->pcmpgtb(xmm_ge, saturated_sum); - code->pcmpeqb(saturated_sum, saturated_sum); - code->pxor(xmm_ge, saturated_sum); + code.pxor(xmm_ge, xmm_ge); + code.movdqa(saturated_sum, xmm_a); + code.paddsb(saturated_sum, xmm_b); + code.pcmpgtb(xmm_ge, saturated_sum); + code.pcmpeqb(saturated_sum, saturated_sum); + code.pxor(xmm_ge, saturated_sum); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.EraseInstruction(ge_inst); } - code->paddb(xmm_a, xmm_b); + code.paddb(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(inst, xmm_a); } @@ -77,19 +77,19 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - code->paddw(xmm_a, xmm_b); + code.paddw(xmm_a, xmm_b); if (ge_inst) { - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); - code->pcmpeqb(ones, ones); + code.pcmpeqb(ones, ones); - code->movdqa(xmm_ge, xmm_a); - code->pminuw(xmm_ge, xmm_b); - code->pcmpeqw(xmm_ge, xmm_b); - code->pxor(xmm_ge, ones); + code.movdqa(xmm_ge, xmm_a); + code.pminuw(xmm_ge, xmm_b); + code.pcmpeqw(xmm_ge, xmm_b); + code.pxor(xmm_ge, ones); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.EraseInstruction(ge_inst); @@ -98,11 +98,11 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(); // !(b <= a+b) == b > a+b - code->movdqa(tmp_a, xmm_a); - code->movdqa(tmp_b, xmm_b); - code->paddw(tmp_a, code->MConst(0x80008000)); - code->paddw(tmp_b, code->MConst(0x80008000)); - code->pcmpgtw(tmp_b, tmp_a); // *Signed* comparison! + code.movdqa(tmp_a, xmm_a); + code.movdqa(tmp_b, xmm_b); + code.paddw(tmp_a, code.MConst(0x80008000)); + code.paddw(tmp_b, code.MConst(0x80008000)); + code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison! ctx.reg_alloc.DefineValue(ge_inst, tmp_b); ctx.EraseInstruction(ge_inst); @@ -123,18 +123,18 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - code->pxor(xmm_ge, xmm_ge); - code->movdqa(saturated_sum, xmm_a); - code->paddsw(saturated_sum, xmm_b); - code->pcmpgtw(xmm_ge, saturated_sum); - code->pcmpeqw(saturated_sum, saturated_sum); - code->pxor(xmm_ge, saturated_sum); + code.pxor(xmm_ge, xmm_ge); + code.movdqa(saturated_sum, xmm_a); + code.paddsw(saturated_sum, xmm_b); + code.pcmpgtw(xmm_ge, saturated_sum); + code.pcmpeqw(saturated_sum, saturated_sum); + code.pxor(xmm_ge, saturated_sum); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.EraseInstruction(ge_inst); } - code->paddw(xmm_a, xmm_b); + code.paddw(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(inst, xmm_a); } @@ -149,15 +149,15 @@ void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) { if (ge_inst) { Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - code->movdqa(xmm_ge, xmm_a); - code->pmaxub(xmm_ge, xmm_b); - code->pcmpeqb(xmm_ge, xmm_a); + code.movdqa(xmm_ge, xmm_a); + code.pmaxub(xmm_ge, xmm_b); + code.pcmpeqb(xmm_ge, xmm_a); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.EraseInstruction(ge_inst); } - code->psubb(xmm_a, xmm_b); + code.psubb(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(inst, xmm_a); } @@ -173,18 +173,18 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - code->pxor(xmm_ge, xmm_ge); - code->movdqa(saturated_sum, xmm_a); - code->psubsb(saturated_sum, xmm_b); - code->pcmpgtb(xmm_ge, saturated_sum); - code->pcmpeqb(saturated_sum, saturated_sum); - code->pxor(xmm_ge, saturated_sum); + code.pxor(xmm_ge, xmm_ge); + code.movdqa(saturated_sum, xmm_a); + code.psubsb(saturated_sum, xmm_b); + code.pcmpgtb(xmm_ge, saturated_sum); + code.pcmpeqb(saturated_sum, saturated_sum); + code.pxor(xmm_ge, saturated_sum); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.EraseInstruction(ge_inst); } - code->psubb(xmm_a, xmm_b); + code.psubb(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(inst, xmm_a); } @@ -197,22 +197,22 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - code->psubw(xmm_a, xmm_b); + code.psubw(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(inst, xmm_a); return; } - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - code->movdqa(xmm_ge, xmm_a); - code->pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1 - code->pcmpeqw(xmm_ge, xmm_a); + code.movdqa(xmm_ge, xmm_a); + code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1 + code.pcmpeqw(xmm_ge, xmm_a); - code->psubw(xmm_a, xmm_b); + code.psubw(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.EraseInstruction(ge_inst); @@ -226,14 +226,14 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); // (a >= b) == !(b > a) - code->pcmpeqb(ones, ones); - code->paddw(xmm_a, code->MConst(0x80008000)); - code->paddw(xmm_b, code->MConst(0x80008000)); - code->movdqa(xmm_ge, xmm_b); - code->pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison! - code->pxor(xmm_ge, ones); + code.pcmpeqb(ones, ones); + code.paddw(xmm_a, code.MConst(0x80008000)); + code.paddw(xmm_b, code.MConst(0x80008000)); + code.movdqa(xmm_ge, xmm_b); + code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison! + code.pxor(xmm_ge, ones); - code->psubw(xmm_a, xmm_b); + code.psubw(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.EraseInstruction(ge_inst); @@ -251,18 +251,18 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - code->pxor(xmm_ge, xmm_ge); - code->movdqa(saturated_diff, xmm_a); - code->psubsw(saturated_diff, xmm_b); - code->pcmpgtw(xmm_ge, saturated_diff); - code->pcmpeqw(saturated_diff, saturated_diff); - code->pxor(xmm_ge, saturated_diff); + code.pxor(xmm_ge, xmm_ge); + code.movdqa(saturated_diff, xmm_a); + code.psubsw(saturated_diff, xmm_b); + code.pcmpgtw(xmm_ge, saturated_diff); + code.pcmpeqw(saturated_diff, saturated_diff); + code.pxor(xmm_ge, saturated_diff); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.EraseInstruction(ge_inst); } - code->psubw(xmm_a, xmm_b); + code.psubw(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(inst, xmm_a); } @@ -280,11 +280,11 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) { // Therefore, // ~pavg(~a, ~b) == (a + b) >> 1 - code->pcmpeqb(ones, ones); - code->pxor(xmm_a, ones); - code->pxor(xmm_b, ones); - code->pavgb(xmm_a, xmm_b); - code->pxor(xmm_a, ones); + code.pcmpeqb(ones, ones); + code.pxor(xmm_a, ones); + code.pxor(xmm_b, ones); + code.pavgb(xmm_a, xmm_b); + code.pxor(xmm_a, ones); ctx.reg_alloc.DefineValue(inst, xmm_a); } else { @@ -299,12 +299,12 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) { // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1). // We mask by 0x7F to remove the LSB so that it doesn't leak into the field below. - code->mov(xor_a_b, reg_a); - code->and_(and_a_b, reg_b); - code->xor_(xor_a_b, reg_b); - code->shr(xor_a_b, 1); - code->and_(xor_a_b, 0x7F7F7F7F); - code->add(result, xor_a_b); + code.mov(xor_a_b, reg_a); + code.and_(and_a_b, reg_b); + code.xor_(xor_a_b, reg_b); + code.shr(xor_a_b, 1); + code.and_(xor_a_b, 0x7F7F7F7F); + code.add(result, xor_a_b); ctx.reg_alloc.DefineValue(inst, result); } @@ -318,11 +318,11 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code->movdqa(tmp, xmm_a); - code->pand(xmm_a, xmm_b); - code->pxor(tmp, xmm_b); - code->psrlw(tmp, 1); - code->paddw(xmm_a, tmp); + code.movdqa(tmp, xmm_a); + code.pand(xmm_a, xmm_b); + code.pxor(tmp, xmm_b); + code.psrlw(tmp, 1); + code.paddw(xmm_a, tmp); ctx.reg_alloc.DefineValue(inst, xmm_a); } else { @@ -337,12 +337,12 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) { // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1). // We mask by 0x7FFF to remove the LSB so that it doesn't leak into the field below. - code->mov(xor_a_b, reg_a); - code->and_(and_a_b, reg_b); - code->xor_(xor_a_b, reg_b); - code->shr(xor_a_b, 1); - code->and_(xor_a_b, 0x7FFF7FFF); - code->add(result, xor_a_b); + code.mov(xor_a_b, reg_a); + code.and_(and_a_b, reg_b); + code.xor_(xor_a_b, reg_b); + code.shr(xor_a_b, 1); + code.and_(xor_a_b, 0x7FFF7FFF); + code.add(result, xor_a_b); ctx.reg_alloc.DefineValue(inst, result); } @@ -364,15 +364,15 @@ void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) { // We mask by 0x7F to remove the LSB so that it doesn't leak into the field below. // carry propagates the sign bit from (x^y)>>1 upwards by one. - code->mov(xor_a_b, reg_a); - code->and_(and_a_b, reg_b); - code->xor_(xor_a_b, reg_b); - code->mov(carry, xor_a_b); - code->and_(carry, 0x80808080); - code->shr(xor_a_b, 1); - code->and_(xor_a_b, 0x7F7F7F7F); - code->add(result, xor_a_b); - code->xor_(result, carry); + code.mov(xor_a_b, reg_a); + code.and_(and_a_b, reg_b); + code.xor_(xor_a_b, reg_b); + code.mov(carry, xor_a_b); + code.and_(carry, 0x80808080); + code.shr(xor_a_b, 1); + code.and_(xor_a_b, 0x7F7F7F7F); + code.add(result, xor_a_b); + code.xor_(result, carry); ctx.reg_alloc.DefineValue(inst, result); } @@ -389,11 +389,11 @@ void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) { // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>>1). // The arithmetic shift right makes this signed. - code->movdqa(tmp, xmm_a); - code->pand(xmm_a, xmm_b); - code->pxor(tmp, xmm_b); - code->psraw(tmp, 1); - code->paddw(xmm_a, tmp); + code.movdqa(tmp, xmm_a); + code.pand(xmm_a, xmm_b); + code.pxor(tmp, xmm_b); + code.psraw(tmp, 1); + code.paddw(xmm_a, tmp); ctx.reg_alloc.DefineValue(inst, xmm_a); } @@ -408,9 +408,9 @@ void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) { // Note that x^y always contains the LSB of the result. // Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y). - code->xor_(minuend, subtrahend); - code->and_(subtrahend, minuend); - code->shr(minuend, 1); + code.xor_(minuend, subtrahend); + code.and_(subtrahend, minuend); + code.shr(minuend, 1); // At this point, // minuend := (a^b) >> 1 @@ -420,9 +420,9 @@ void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) { // We can do this because minuend contains 7 bit fields. // We use the extra bit in minuend as a bit to borrow from; we set this bit. // We invert this bit at the end as this tells us if that bit was borrowed from. - code->or_(minuend, 0x80808080); - code->sub(minuend, subtrahend); - code->xor_(minuend, 0x80808080); + code.or_(minuend, 0x80808080); + code.sub(minuend, subtrahend); + code.xor_(minuend, 0x80808080); // minuend now contains the desired result. ctx.reg_alloc.DefineValue(inst, minuend); @@ -440,11 +440,11 @@ void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) { // Note that x^y always contains the LSB of the result. // Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y). - code->xor_(minuend, subtrahend); - code->and_(subtrahend, minuend); - code->mov(carry, minuend); - code->and_(carry, 0x80808080); - code->shr(minuend, 1); + code.xor_(minuend, subtrahend); + code.and_(subtrahend, minuend); + code.mov(carry, minuend); + code.and_(carry, 0x80808080); + code.shr(minuend, 1); // At this point, // minuend := (a^b) >> 1 @@ -456,10 +456,10 @@ void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) { // We use the extra bit in minuend as a bit to borrow from; we set this bit. // We invert this bit at the end as this tells us if that bit was borrowed from. // We then sign extend the result into this bit. - code->or_(minuend, 0x80808080); - code->sub(minuend, subtrahend); - code->xor_(minuend, 0x80808080); - code->xor_(minuend, carry); + code.or_(minuend, 0x80808080); + code.sub(minuend, subtrahend); + code.xor_(minuend, 0x80808080); + code.xor_(minuend, carry); ctx.reg_alloc.DefineValue(inst, minuend); } @@ -474,15 +474,15 @@ void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) { // Note that x^y always contains the LSB of the result. // Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y). - code->pxor(minuend, subtrahend); - code->pand(subtrahend, minuend); - code->psrlw(minuend, 1); + code.pxor(minuend, subtrahend); + code.pand(subtrahend, minuend); + code.psrlw(minuend, 1); // At this point, // minuend := (a^b) >> 1 // subtrahend := (a^b) & b - code->psubw(minuend, subtrahend); + code.psubw(minuend, subtrahend); ctx.reg_alloc.DefineValue(inst, minuend); } @@ -497,20 +497,20 @@ void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) { // Note that x^y always contains the LSB of the result. // Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>>1) - ((x^y)&y). - code->pxor(minuend, subtrahend); - code->pand(subtrahend, minuend); - code->psraw(minuend, 1); + code.pxor(minuend, subtrahend); + code.pand(subtrahend, minuend); + code.psraw(minuend, 1); // At this point, // minuend := (a^b) >>> 1 // subtrahend := (a^b) & b - code->psubw(minuend, subtrahend); + code.psubw(minuend, subtrahend); ctx.reg_alloc.DefineValue(inst, minuend); } -void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) { +void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); @@ -521,25 +521,25 @@ void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool Xbyak::Reg32 reg_sum, reg_diff; if (is_signed) { - code->movsx(reg_a_lo, reg_a_hi.cvt16()); - code->movsx(reg_b_lo, reg_b_hi.cvt16()); - code->sar(reg_a_hi, 16); - code->sar(reg_b_hi, 16); + code.movsx(reg_a_lo, reg_a_hi.cvt16()); + code.movsx(reg_b_lo, reg_b_hi.cvt16()); + code.sar(reg_a_hi, 16); + code.sar(reg_b_hi, 16); } else { - code->movzx(reg_a_lo, reg_a_hi.cvt16()); - code->movzx(reg_b_lo, reg_b_hi.cvt16()); - code->shr(reg_a_hi, 16); - code->shr(reg_b_hi, 16); + code.movzx(reg_a_lo, reg_a_hi.cvt16()); + code.movzx(reg_b_lo, reg_b_hi.cvt16()); + code.shr(reg_a_hi, 16); + code.shr(reg_b_hi, 16); } if (hi_is_sum) { - code->sub(reg_a_lo, reg_b_hi); - code->add(reg_a_hi, reg_b_lo); + code.sub(reg_a_lo, reg_b_hi); + code.add(reg_a_hi, reg_b_lo); reg_diff = reg_a_lo; reg_sum = reg_a_hi; } else { - code->add(reg_a_lo, reg_b_hi); - code->sub(reg_a_hi, reg_b_lo); + code.add(reg_a_lo, reg_b_hi); + code.sub(reg_a_hi, reg_b_lo); reg_diff = reg_a_hi; reg_sum = reg_a_lo; } @@ -549,36 +549,36 @@ void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool Xbyak::Reg32 ge_sum = reg_b_hi; Xbyak::Reg32 ge_diff = reg_b_lo; - code->mov(ge_sum, reg_sum); - code->mov(ge_diff, reg_diff); + code.mov(ge_sum, reg_sum); + code.mov(ge_diff, reg_diff); if (!is_signed) { - code->shl(ge_sum, 15); - code->sar(ge_sum, 31); + code.shl(ge_sum, 15); + code.sar(ge_sum, 31); } else { - code->not_(ge_sum); - code->sar(ge_sum, 31); + code.not_(ge_sum); + code.sar(ge_sum, 31); } - code->not_(ge_diff); - code->sar(ge_diff, 31); - code->and_(ge_sum, hi_is_sum ? 0xFFFF0000 : 0x0000FFFF); - code->and_(ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000); - code->or_(ge_sum, ge_diff); + code.not_(ge_diff); + code.sar(ge_diff, 31); + code.and_(ge_sum, hi_is_sum ? 0xFFFF0000 : 0x0000FFFF); + code.and_(ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000); + code.or_(ge_sum, ge_diff); ctx.reg_alloc.DefineValue(ge_inst, ge_sum); ctx.EraseInstruction(ge_inst); } if (is_halving) { - code->shl(reg_a_lo, 15); - code->shr(reg_a_hi, 1); + code.shl(reg_a_lo, 15); + code.shr(reg_a_hi, 1); } else { - code->shl(reg_a_lo, 16); + code.shl(reg_a_lo, 16); } // reg_a_lo now contains the low word and reg_a_hi now contains the high word. // Merge them. - code->shld(reg_a_hi, reg_a_lo, 16); + code.shld(reg_a_hi, reg_a_lo, 16); ctx.reg_alloc.DefineValue(inst, reg_a_hi); } @@ -615,13 +615,13 @@ void EmitX64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) { EmitPackedSubAdd(code, ctx, inst, false, true, true); } -static void EmitPackedOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) { +static void EmitPackedOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - (code->*fn)(xmm_a, xmm_b); + (code.*fn)(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(inst, xmm_a); } @@ -672,19 +672,19 @@ void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm to = ctx.reg_alloc.UseXmm(args[1]); Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[2]); - code->pand(from, ge); - code->pandn(ge, to); - code->por(from, ge); + code.pand(from, ge); + code.pandn(ge, to); + code.por(from, ge); ctx.reg_alloc.DefineValue(inst, from); - } else if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI1)) { + } else if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI1)) { Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(args[0]).cvt32(); Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32(); - code->and_(from, ge); - code->andn(to, ge, to); - code->or_(from, to); + code.and_(from, ge); + code.andn(to, ge, to); + code.or_(from, to); ctx.reg_alloc.DefineValue(inst, from); } else { @@ -692,10 +692,10 @@ void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(args[1]).cvt32(); Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32(); - code->and_(from, ge); - code->not_(ge); - code->and_(ge, to); - code->or_(from, ge); + code.and_(from, ge); + code.not_(ge); + code.and_(ge, to); + code.or_(from, ge); ctx.reg_alloc.DefineValue(inst, from); } diff --git a/src/backend_x64/emit_x64_saturation.cpp b/src/backend_x64/emit_x64_saturation.cpp index 865ae643..5c4483ad 100644 --- a/src/backend_x64/emit_x64_saturation.cpp +++ b/src/backend_x64/emit_x64_saturation.cpp @@ -26,15 +26,15 @@ void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 addend = ctx.reg_alloc.UseGpr(args[1]).cvt32(); Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(overflow, result); - code->shr(overflow, 31); - code->add(overflow, 0x7FFFFFFF); + code.mov(overflow, result); + code.shr(overflow, 31); + code.add(overflow, 0x7FFFFFFF); // overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative - code->add(result, addend); - code->cmovo(result, overflow); + code.add(result, addend); + code.cmovo(result, overflow); if (overflow_inst) { - code->seto(overflow.cvt8()); + code.seto(overflow.cvt8()); ctx.reg_alloc.DefineValue(overflow_inst, overflow); ctx.EraseInstruction(overflow_inst); @@ -52,15 +52,15 @@ void EmitX64::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 subend = ctx.reg_alloc.UseGpr(args[1]).cvt32(); Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); - code->mov(overflow, result); - code->shr(overflow, 31); - code->add(overflow, 0x7FFFFFFF); + code.mov(overflow, result); + code.shr(overflow, 31); + code.add(overflow, 0x7FFFFFFF); // overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative - code->sub(result, subend); - code->cmovo(result, overflow); + code.sub(result, subend); + code.cmovo(result, overflow); if (overflow_inst) { - code->seto(overflow.cvt8()); + code.seto(overflow.cvt8()); ctx.reg_alloc.DefineValue(overflow_inst, overflow); ctx.EraseInstruction(overflow_inst); @@ -83,14 +83,14 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); // Pseudocode: result = clamp(reg_a, 0, saturated_value); - code->xor_(overflow, overflow); - code->cmp(reg_a, saturated_value); - code->mov(result, saturated_value); - code->cmovle(result, overflow); - code->cmovbe(result, reg_a); + code.xor_(overflow, overflow); + code.cmp(reg_a, saturated_value); + code.mov(result, saturated_value); + code.cmovle(result, overflow); + code.cmovbe(result, reg_a); if (overflow_inst) { - code->seta(overflow.cvt8()); + code.seta(overflow.cvt8()); ctx.reg_alloc.DefineValue(overflow_inst, overflow); ctx.EraseInstruction(overflow_inst); @@ -126,20 +126,20 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); // overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value. - code->lea(overflow, code->ptr[reg_a.cvt64() + negative_saturated_value]); + code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]); // Put the appropriate saturated value in result - code->cmp(reg_a, positive_saturated_value); - code->mov(tmp, positive_saturated_value); - code->mov(result, sext_negative_satured_value); - code->cmovg(result, tmp); + code.cmp(reg_a, positive_saturated_value); + code.mov(tmp, positive_saturated_value); + code.mov(result, sext_negative_satured_value); + code.cmovg(result, tmp); // Do the saturation - code->cmp(overflow, mask); - code->cmovbe(result, reg_a); + code.cmp(overflow, mask); + code.cmovbe(result, reg_a); if (overflow_inst) { - code->seta(overflow.cvt8()); + code.seta(overflow.cvt8()); ctx.reg_alloc.DefineValue(overflow_inst, overflow); ctx.EraseInstruction(overflow_inst); diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 455d0296..12b78746 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -17,13 +17,13 @@ namespace Dynarmic::BackendX64 { using namespace Xbyak::util; template -static void EmitVectorOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, Function fn) { +static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - (code->*fn)(xmm_a, xmm_b); + (code.*fn)(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(inst, xmm_a); } @@ -35,15 +35,15 @@ void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32(); - code->pextrb(dest, source, index); + code.pextrb(dest, source, index); ctx.reg_alloc.DefineValue(inst, dest); } else { Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32(); - code->pextrw(dest, source, index / 2); + code.pextrw(dest, source, index / 2); if (index % 2 == 1) { - code->shr(dest, 8); + code.shr(dest, 8); } ctx.reg_alloc.DefineValue(inst, dest); } @@ -56,7 +56,7 @@ void EmitX64::EmitVectorGetElement16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32(); - code->pextrw(dest, source, index); + code.pextrw(dest, source, index); ctx.reg_alloc.DefineValue(inst, dest); } @@ -69,14 +69,14 @@ void EmitX64::EmitVectorGetElement32(EmitContext& ctx, IR::Inst* inst) { if (index == 0) { Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); - code->movd(dest, source); - } else if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + code.movd(dest, source); + } else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); - code->pextrd(dest, source, index); + code.pextrd(dest, source, index); } else { Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]); - code->pshufd(source, source, index); - code->movd(dest, source); + code.pshufd(source, source, index); + code.movd(dest, source); } ctx.reg_alloc.DefineValue(inst, dest); @@ -91,14 +91,14 @@ void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) { if (index == 0) { Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); - code->movq(dest, source); - } else if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + code.movq(dest, source); + } else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); - code->pextrq(dest, source, 1); + code.pextrq(dest, source, 1); } else { Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]); - code->punpckhqdq(source, source); - code->movq(dest, source); + code.punpckhqdq(source, source); + code.movq(dest, source); } ctx.reg_alloc.DefineValue(inst, dest); @@ -109,11 +109,11 @@ void EmitX64::EmitVectorSetElement8(EmitContext& ctx, IR::Inst* inst) { ASSERT(args[1].IsImmediate()); u8 index = args[1].GetImmediateU8(); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg8 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt8(); - code->pinsrb(source_vector, source_elem.cvt32(), index); + code.pinsrb(source_vector, source_elem.cvt32(), index); ctx.reg_alloc.DefineValue(inst, source_vector); } else { @@ -121,17 +121,17 @@ void EmitX64::EmitVectorSetElement8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32(); Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); - code->pextrw(tmp, source_vector, index / 2); + code.pextrw(tmp, source_vector, index / 2); if (index % 2 == 0) { - code->and_(tmp, 0xFF00); - code->and_(source_elem, 0x00FF); - code->or_(tmp, source_elem); + code.and_(tmp, 0xFF00); + code.and_(source_elem, 0x00FF); + code.or_(tmp, source_elem); } else { - code->and_(tmp, 0x00FF); - code->shl(source_elem, 8); - code->or_(tmp, source_elem); + code.and_(tmp, 0x00FF); + code.shl(source_elem, 8); + code.or_(tmp, source_elem); } - code->pinsrw(source_vector, tmp, index / 2); + code.pinsrw(source_vector, tmp, index / 2); ctx.reg_alloc.DefineValue(inst, source_vector); } @@ -145,7 +145,7 @@ void EmitX64::EmitVectorSetElement16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg16 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt16(); - code->pinsrw(source_vector, source_elem.cvt32(), index); + code.pinsrw(source_vector, source_elem.cvt32(), index); ctx.reg_alloc.DefineValue(inst, source_vector); } @@ -155,20 +155,20 @@ void EmitX64::EmitVectorSetElement32(EmitContext& ctx, IR::Inst* inst) { ASSERT(args[1].IsImmediate()); u8 index = args[1].GetImmediateU8(); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg32 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt32(); - code->pinsrd(source_vector, source_elem, index); + code.pinsrd(source_vector, source_elem, index); ctx.reg_alloc.DefineValue(inst, source_vector); } else { Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg32 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32(); - code->pinsrw(source_vector, source_elem, index * 2); - code->shr(source_elem, 16); - code->pinsrw(source_vector, source_elem, index * 2 + 1); + code.pinsrw(source_vector, source_elem, index * 2); + code.shr(source_elem, 16); + code.pinsrw(source_vector, source_elem, index * 2 + 1); ctx.reg_alloc.DefineValue(inst, source_vector); } @@ -179,24 +179,24 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) { ASSERT(args[1].IsImmediate()); u8 index = args[1].GetImmediateU8(); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(args[2]); - code->pinsrq(source_vector, source_elem, index); + code.pinsrq(source_vector, source_elem, index); ctx.reg_alloc.DefineValue(inst, source_vector); } else { Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg64 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]); - code->pinsrw(source_vector, source_elem.cvt32(), index * 4); - code->shr(source_elem, 16); - code->pinsrw(source_vector, source_elem.cvt32(), index * 4 + 1); - code->shr(source_elem, 16); - code->pinsrw(source_vector, source_elem.cvt32(), index * 4 + 2); - code->shr(source_elem, 16); - code->pinsrw(source_vector, source_elem.cvt32(), index * 4 + 3); + code.pinsrw(source_vector, source_elem.cvt32(), index * 4); + code.shr(source_elem, 16); + code.pinsrw(source_vector, source_elem.cvt32(), index * 4 + 1); + code.shr(source_elem, 16); + code.pinsrw(source_vector, source_elem.cvt32(), index * 4 + 2); + code.shr(source_elem, 16); + code.pinsrw(source_vector, source_elem.cvt32(), index * 4 + 3); ctx.reg_alloc.DefineValue(inst, source_vector); } @@ -227,15 +227,15 @@ void EmitX64::EmitVectorLowerBroadcast8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code->pxor(tmp, tmp); - code->pshufb(a, tmp); - code->movq(a, a); + code.pxor(tmp, tmp); + code.pshufb(a, tmp); + code.movq(a, a); } else { - code->punpcklbw(a, a); - code->pshuflw(a, a, 0); + code.punpcklbw(a, a); + code.pshuflw(a, a, 0); } ctx.reg_alloc.DefineValue(inst, a); @@ -246,7 +246,7 @@ void EmitX64::EmitVectorLowerBroadcast16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - code->pshuflw(a, a, 0); + code.pshuflw(a, a, 0); ctx.reg_alloc.DefineValue(inst, a); } @@ -256,7 +256,7 @@ void EmitX64::EmitVectorLowerBroadcast32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - code->pshuflw(a, a, 0b01000100); + code.pshuflw(a, a, 0b01000100); ctx.reg_alloc.DefineValue(inst, a); } @@ -266,15 +266,15 @@ void EmitX64::EmitVectorBroadcast8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code->pxor(tmp, tmp); - code->pshufb(a, tmp); + code.pxor(tmp, tmp); + code.pshufb(a, tmp); } else { - code->punpcklbw(a, a); - code->pshuflw(a, a, 0); - code->punpcklqdq(a, a); + code.punpcklbw(a, a); + code.pshuflw(a, a, 0); + code.punpcklqdq(a, a); } ctx.reg_alloc.DefineValue(inst, a); @@ -285,8 +285,8 @@ void EmitX64::EmitVectorBroadcast16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - code->pshuflw(a, a, 0); - code->punpcklqdq(a, a); + code.pshuflw(a, a, 0); + code.punpcklqdq(a, a); ctx.reg_alloc.DefineValue(inst, a); } @@ -296,7 +296,7 @@ void EmitX64::EmitVectorBroadcast32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - code->pshufd(a, a, 0); + code.pshufd(a, a, 0); ctx.reg_alloc.DefineValue(inst, a); } @@ -306,7 +306,7 @@ void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - code->punpcklqdq(a, a); + code.punpcklqdq(a, a); ctx.reg_alloc.DefineValue(inst, a); } @@ -325,8 +325,8 @@ void EmitX64::EmitVectorNot(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm(); - code->pcmpeqw(xmm_b, xmm_b); - code->pxor(xmm_a, xmm_b); + code.pcmpeqw(xmm_b, xmm_b); + code.pxor(xmm_a, xmm_b); ctx.reg_alloc.DefineValue(inst, xmm_a); } @@ -344,7 +344,7 @@ void EmitX64::EmitVectorEqual32(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) { - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pcmpeqq); return; } @@ -355,9 +355,9 @@ void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code->pcmpeqd(xmm_a, xmm_b); - code->pshufd(tmp, xmm_a, 0b10110001); - code->pand(xmm_a, tmp); + code.pcmpeqd(xmm_a, xmm_b); + code.pshufd(tmp, xmm_a, 0b10110001); + code.pand(xmm_a, tmp); ctx.reg_alloc.DefineValue(inst, xmm_a); } @@ -365,14 +365,14 @@ void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code->pcmpeqq(xmm_a, xmm_b); - code->pshufd(tmp, xmm_a, 0b01001110); - code->pand(xmm_a, tmp); + code.pcmpeqq(xmm_a, xmm_b); + code.pshufd(tmp, xmm_a, 0b01001110); + code.pand(xmm_a, tmp); ctx.reg_alloc.DefineValue(inst, xmm_a); } else { @@ -380,11 +380,11 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code->pcmpeqd(xmm_a, xmm_b); - code->pshufd(tmp, xmm_a, 0b10110001); - code->pand(xmm_a, tmp); - code->pshufd(tmp, xmm_a, 0b01001110); - code->pand(xmm_a, tmp); + code.pcmpeqd(xmm_a, xmm_b); + code.pshufd(tmp, xmm_a, 0b10110001); + code.pand(xmm_a, tmp); + code.pshufd(tmp, xmm_a, 0b01001110); + code.pand(xmm_a, tmp); ctx.reg_alloc.DefineValue(inst, xmm_a); } @@ -397,13 +397,13 @@ void EmitX64::EmitVectorLowerPairedAdd8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code->punpcklqdq(xmm_a, xmm_b); - code->movdqa(tmp, xmm_a); - code->psllw(xmm_a, 8); - code->paddw(xmm_a, tmp); - code->pxor(tmp, tmp); - code->psrlw(xmm_a, 8); - code->packuswb(xmm_a, tmp); + code.punpcklqdq(xmm_a, xmm_b); + code.movdqa(tmp, xmm_a); + code.psllw(xmm_a, 8); + code.paddw(xmm_a, tmp); + code.pxor(tmp, tmp); + code.psrlw(xmm_a, 8); + code.packuswb(xmm_a, tmp); ctx.reg_alloc.DefineValue(inst, xmm_a); } @@ -415,17 +415,17 @@ void EmitX64::EmitVectorLowerPairedAdd16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code->punpcklqdq(xmm_a, xmm_b); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { - code->pxor(tmp, tmp); - code->phaddw(xmm_a, tmp); + code.punpcklqdq(xmm_a, xmm_b); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { + code.pxor(tmp, tmp); + code.phaddw(xmm_a, tmp); } else { - code->movdqa(tmp, xmm_a); - code->pslld(xmm_a, 16); - code->paddd(xmm_a, tmp); - code->pxor(tmp, tmp); - code->psrad(xmm_a, 16); - code->packssdw(xmm_a, tmp); // Note: packusdw is SSE4.1, hence the arithmetic shift above. + code.movdqa(tmp, xmm_a); + code.pslld(xmm_a, 16); + code.paddd(xmm_a, tmp); + code.pxor(tmp, tmp); + code.psrad(xmm_a, 16); + code.packssdw(xmm_a, tmp); // Note: packusdw is SSE4.1, hence the arithmetic shift above. } ctx.reg_alloc.DefineValue(inst, xmm_a); @@ -438,16 +438,16 @@ void EmitX64::EmitVectorLowerPairedAdd32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - code->punpcklqdq(xmm_a, xmm_b); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { - code->pxor(tmp, tmp); - code->phaddd(xmm_a, tmp); + code.punpcklqdq(xmm_a, xmm_b); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { + code.pxor(tmp, tmp); + code.phaddd(xmm_a, tmp); } else { - code->movdqa(tmp, xmm_a); - code->psllq(xmm_a, 32); - code->paddq(xmm_a, tmp); - code->psrlq(xmm_a, 32); - code->pshufd(xmm_a, xmm_a, 0b11011000); + code.movdqa(tmp, xmm_a); + code.psllq(xmm_a, 32); + code.paddq(xmm_a, tmp); + code.psrlq(xmm_a, 32); + code.pshufd(xmm_a, xmm_a, 0b11011000); } ctx.reg_alloc.DefineValue(inst, xmm_a); @@ -461,15 +461,15 @@ void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(); - code->movdqa(c, a); - code->movdqa(d, b); - code->psllw(a, 8); - code->psllw(b, 8); - code->paddw(a, c); - code->paddw(b, d); - code->psrlw(a, 8); - code->psrlw(b, 8); - code->packuswb(a, b); + code.movdqa(c, a); + code.movdqa(d, b); + code.psllw(a, 8); + code.psllw(b, 8); + code.paddw(a, c); + code.paddw(b, d); + code.psrlw(a, 8); + code.psrlw(b, 8); + code.packuswb(a, b); ctx.reg_alloc.DefineValue(inst, a); } @@ -477,11 +477,11 @@ void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - code->phaddw(a, b); + code.phaddw(a, b); ctx.reg_alloc.DefineValue(inst, a); } else { @@ -490,15 +490,15 @@ void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(); - code->movdqa(c, a); - code->movdqa(d, b); - code->pslld(a, 16); - code->pslld(b, 16); - code->paddd(a, c); - code->paddd(b, d); - code->psrad(a, 16); - code->psrad(b, 16); - code->packssdw(a, b); + code.movdqa(c, a); + code.movdqa(d, b); + code.pslld(a, 16); + code.pslld(b, 16); + code.paddd(a, c); + code.paddd(b, d); + code.psrad(a, 16); + code.psrad(b, 16); + code.packssdw(a, b); ctx.reg_alloc.DefineValue(inst, a); } @@ -507,11 +507,11 @@ void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - code->phaddd(a, b); + code.phaddd(a, b); ctx.reg_alloc.DefineValue(inst, a); } else { @@ -520,13 +520,13 @@ void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(); - code->movdqa(c, a); - code->movdqa(d, b); - code->psllq(a, 32); - code->psllq(b, 32); - code->paddq(a, c); - code->paddq(b, d); - code->shufps(a, b, 0b11011101); + code.movdqa(c, a); + code.movdqa(d, b); + code.psllq(a, 32); + code.psllq(b, 32); + code.paddq(a, c); + code.paddq(b, d); + code.shufps(a, b, 0b11011101); ctx.reg_alloc.DefineValue(inst, a); } @@ -539,10 +539,10 @@ void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); - code->movdqa(c, a); - code->punpcklqdq(a, b); - code->punpckhqdq(c, b); - code->paddq(a, c); + code.movdqa(c, a); + code.punpcklqdq(a, b); + code.punpckhqdq(c, b); + code.paddq(a, c); ctx.reg_alloc.DefineValue(inst, a); } @@ -552,7 +552,7 @@ void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - code->movq(a, a); // TODO: !IsLastUse + code.movq(a, a); // TODO: !IsLastUse ctx.reg_alloc.DefineValue(inst, a); } diff --git a/src/backend_x64/exception_handler_generic.cpp b/src/backend_x64/exception_handler_generic.cpp index 155ff506..5f6904b5 100644 --- a/src/backend_x64/exception_handler_generic.cpp +++ b/src/backend_x64/exception_handler_generic.cpp @@ -14,7 +14,7 @@ struct BlockOfCode::ExceptionHandler::Impl final { BlockOfCode::ExceptionHandler::ExceptionHandler() = default; BlockOfCode::ExceptionHandler::~ExceptionHandler() = default; -void BlockOfCode::ExceptionHandler::Register(BlockOfCode*) { +void BlockOfCode::ExceptionHandler::Register(BlockOfCode&) { // Do nothing } diff --git a/src/backend_x64/exception_handler_windows.cpp b/src/backend_x64/exception_handler_windows.cpp index 1511f186..c6871f88 100644 --- a/src/backend_x64/exception_handler_windows.cpp +++ b/src/backend_x64/exception_handler_windows.cpp @@ -173,11 +173,11 @@ private: BlockOfCode::ExceptionHandler::ExceptionHandler() = default; BlockOfCode::ExceptionHandler::~ExceptionHandler() = default; -void BlockOfCode::ExceptionHandler::Register(BlockOfCode* code) { +void BlockOfCode::ExceptionHandler::Register(BlockOfCode& code) { const auto prolog_info = GetPrologueInformation(); - code->align(16); - UNWIND_INFO* unwind_info = static_cast(code->AllocateFromCodeSpace(sizeof(UNWIND_INFO))); + code.align(16); + UNWIND_INFO* unwind_info = static_cast(code.AllocateFromCodeSpace(sizeof(UNWIND_INFO))); unwind_info->Version = 1; unwind_info->Flags = 0; // No special exception handling required. unwind_info->SizeOfProlog = prolog_info.prolog_size; @@ -186,16 +186,16 @@ void BlockOfCode::ExceptionHandler::Register(BlockOfCode* code) { unwind_info->FrameOffset = 0; // Unused because FrameRegister == 0 // UNWIND_INFO::UnwindCode field: const size_t size_of_unwind_code = sizeof(UNWIND_CODE) * prolog_info.unwind_code.size(); - UNWIND_CODE* unwind_code = static_cast(code->AllocateFromCodeSpace(size_of_unwind_code)); + UNWIND_CODE* unwind_code = static_cast(code.AllocateFromCodeSpace(size_of_unwind_code)); memcpy(unwind_code, prolog_info.unwind_code.data(), size_of_unwind_code); - code->align(16); - RUNTIME_FUNCTION* rfuncs = static_cast(code->AllocateFromCodeSpace(sizeof(RUNTIME_FUNCTION))); - rfuncs->BeginAddress = static_cast(reinterpret_cast(code->run_code) - code->getCode()); - rfuncs->EndAddress = static_cast(code->maxSize_); - rfuncs->UnwindData = static_cast(reinterpret_cast(unwind_info) - code->getCode()); + code.align(16); + RUNTIME_FUNCTION* rfuncs = static_cast(code.AllocateFromCodeSpace(sizeof(RUNTIME_FUNCTION))); + rfuncs->BeginAddress = static_cast(reinterpret_cast(code.run_code) - code.getCode()); + rfuncs->EndAddress = static_cast(code.maxSize_); + rfuncs->UnwindData = static_cast(reinterpret_cast(unwind_info) - code.getCode()); - impl = std::make_unique(rfuncs, code->getCode()); + impl = std::make_unique(rfuncs, code.getCode()); } } // namespace BackendX64 diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 9718c3c6..ac891d47 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -370,10 +370,10 @@ void RegAlloc::HostCall(IR::Inst* result_def, boost::optional arg0, b Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]); switch (args[i]->GetType()) { case IR::Type::U8: - code->movzx(reg.cvt32(), reg.cvt8()); + code.movzx(reg.cvt32(), reg.cvt8()); break; case IR::Type::U16: - code->movzx(reg.cvt32(), reg.cvt16()); + code.movzx(reg.cvt32(), reg.cvt16()); break; default: break; // Nothing needs to be done @@ -459,9 +459,9 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) { Xbyak::Reg64 reg = HostLocToReg64(host_loc); u64 imm_value = ImmediateToU64(imm); if (imm_value == 0) - code->xor_(reg.cvt32(), reg.cvt32()); + code.xor_(reg.cvt32(), reg.cvt32()); else - code->mov(reg, imm_value); + code.mov(reg, imm_value); return host_loc; } @@ -469,9 +469,9 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) { Xbyak::Xmm reg = HostLocToXmm(host_loc); u64 imm_value = ImmediateToU64(imm); if (imm_value == 0) - code->pxor(reg, reg); + code.pxor(reg, reg); else - code->movdqa(reg, code->MConst(imm_value)); // TODO: movaps/movapd more appropriate sometimes + code.movdqa(reg, code.MConst(imm_value)); // TODO: movaps/movapd more appropriate sometimes return host_loc; } @@ -557,42 +557,42 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) { const size_t bit_width = LocInfo(from).GetMaxBitWidth(); if (HostLocIsXMM(to) && HostLocIsXMM(from)) { - code->movaps(HostLocToXmm(to), HostLocToXmm(from)); + code.movaps(HostLocToXmm(to), HostLocToXmm(from)); } else if (HostLocIsGPR(to) && HostLocIsGPR(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->mov(HostLocToReg64(to), HostLocToReg64(from)); + code.mov(HostLocToReg64(to), HostLocToReg64(from)); } else { - code->mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32()); + code.mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32()); } } else if (HostLocIsXMM(to) && HostLocIsGPR(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->movq(HostLocToXmm(to), HostLocToReg64(from)); + code.movq(HostLocToXmm(to), HostLocToReg64(from)); } else { - code->movd(HostLocToXmm(to), HostLocToReg64(from).cvt32()); + code.movd(HostLocToXmm(to), HostLocToReg64(from).cvt32()); } } else if (HostLocIsGPR(to) && HostLocIsXMM(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->movq(HostLocToReg64(to), HostLocToXmm(from)); + code.movq(HostLocToReg64(to), HostLocToXmm(from)); } else { - code->movd(HostLocToReg64(to).cvt32(), HostLocToXmm(from)); + code.movd(HostLocToReg64(to).cvt32(), HostLocToXmm(from)); } } else if (HostLocIsXMM(to) && HostLocIsSpill(from)) { Xbyak::Address spill_addr = spill_to_addr(from); ASSERT(spill_addr.getBit() >= bit_width); switch (bit_width) { case 128: - code->movaps(HostLocToXmm(to), spill_addr); + code.movaps(HostLocToXmm(to), spill_addr); break; case 64: - code->movsd(HostLocToXmm(to), spill_addr); + code.movsd(HostLocToXmm(to), spill_addr); break; case 32: case 16: case 8: - code->movss(HostLocToXmm(to), spill_addr); + code.movss(HostLocToXmm(to), spill_addr); break; default: UNREACHABLE(); @@ -602,15 +602,15 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) { ASSERT(spill_addr.getBit() >= bit_width); switch (bit_width) { case 128: - code->movaps(spill_addr, HostLocToXmm(from)); + code.movaps(spill_addr, HostLocToXmm(from)); break; case 64: - code->movsd(spill_addr, HostLocToXmm(from)); + code.movsd(spill_addr, HostLocToXmm(from)); break; case 32: case 16: case 8: - code->movss(spill_addr, HostLocToXmm(from)); + code.movss(spill_addr, HostLocToXmm(from)); break; default: UNREACHABLE(); @@ -618,16 +618,16 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) { } else if (HostLocIsGPR(to) && HostLocIsSpill(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->mov(HostLocToReg64(to), spill_to_addr(from)); + code.mov(HostLocToReg64(to), spill_to_addr(from)); } else { - code->mov(HostLocToReg64(to).cvt32(), spill_to_addr(from)); + code.mov(HostLocToReg64(to).cvt32(), spill_to_addr(from)); } } else if (HostLocIsSpill(to) && HostLocIsGPR(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->mov(spill_to_addr(to), HostLocToReg64(from)); + code.mov(spill_to_addr(to), HostLocToReg64(from)); } else { - code->mov(spill_to_addr(to), HostLocToReg64(from).cvt32()); + code.mov(spill_to_addr(to), HostLocToReg64(from).cvt32()); } } else { ASSERT_MSG(false, "Invalid RegAlloc::EmitMove"); @@ -636,7 +636,7 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) { void RegAlloc::EmitExchange(HostLoc a, HostLoc b) { if (HostLocIsGPR(a) && HostLocIsGPR(b)) { - code->xchg(HostLocToReg64(a), HostLocToReg64(b)); + code.xchg(HostLocToReg64(a), HostLocToReg64(b)); } else if (HostLocIsXMM(a) && HostLocIsXMM(b)) { ASSERT_MSG(false, "Check your code: Exchanging XMM registers is unnecessary"); } else { diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index b0f60a5b..7ba73873 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -91,7 +91,7 @@ private: class RegAlloc final { public: - explicit RegAlloc(BlockOfCode* code, size_t num_spills, std::function spill_to_addr) + explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function spill_to_addr) : hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {} std::array GetArgumentInfo(IR::Inst* inst); @@ -144,7 +144,7 @@ private: HostLocInfo& LocInfo(HostLoc loc); const HostLocInfo& LocInfo(HostLoc loc) const; - BlockOfCode* code = nullptr; + BlockOfCode& code; std::function spill_to_addr; void EmitMove(HostLoc to, HostLoc from); void EmitExchange(HostLoc a, HostLoc b);