From 2d164d9345cbe00fc223b298a2862b3d74c55948 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 1 Jan 2018 23:40:34 +0000 Subject: [PATCH] Package up emit context --- src/backend_x64/a32_emit_x64.cpp | 348 ++++---- src/backend_x64/a32_emit_x64.h | 13 +- src/backend_x64/emit_x64.cpp | 1272 +++++++++++++++--------------- src/backend_x64/emit_x64.h | 15 +- 4 files changed, 846 insertions(+), 802 deletions(-) diff --git a/src/backend_x64/a32_emit_x64.cpp b/src/backend_x64/a32_emit_x64.cpp index 88aff084..fd18d074 100644 --- a/src/backend_x64/a32_emit_x64.cpp +++ b/src/backend_x64/a32_emit_x64.cpp @@ -49,6 +49,25 @@ static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) { ASSERT_MSG(false, "Should never happen."); } +A32EmitContext::A32EmitContext(RegAlloc& reg_alloc, IR::Block& block) + : EmitContext(reg_alloc, block) {} + +A32::LocationDescriptor A32EmitContext::Location() const { + return A32::LocationDescriptor{block.Location()}; +} + +bool A32EmitContext::FPSCR_RoundTowardsZero() const { + return Location().FPSCR().RMode() != A32::FPSCR::RoundingMode::TowardsZero; +} + +bool A32EmitContext::FPSCR_FTZ() const { + return Location().FPSCR().FTZ(); +} + +bool A32EmitContext::FPSCR_DN() const { + return Location().FPSCR().DN(); +} + A32EmitX64::A32EmitX64(BlockOfCode* code, UserCallbacks cb, Jit* jit_interface) : EmitX64(code, cb, jit_interface) {} @@ -62,6 +81,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { EmitCondPrelude(block); RegAlloc reg_alloc{code}; + A32EmitContext ctx{reg_alloc, block}; for (auto iter = block.begin(); iter != block.end(); ++iter) { IR::Inst* inst = &*iter; @@ -69,13 +89,13 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { // Call the relevant Emit* member function. switch (inst->GetOpcode()) { -#define OPCODE(name, type, ...) \ - case IR::Opcode::name: \ - A32EmitX64::Emit##name(reg_alloc, block, inst); \ +#define OPCODE(name, type, ...) \ + case IR::Opcode::name: \ + A32EmitX64::Emit##name(ctx, inst); \ break; -#define A32OPC(name, type, ...) \ - case IR::Opcode::A32##name: \ - A32EmitX64::EmitA32##name(reg_alloc, block, inst); \ +#define A32OPC(name, type, ...) \ + case IR::Opcode::A32##name: \ + A32EmitX64::EmitA32##name(ctx, inst); \ break; #include "frontend/ir/opcodes.inc" #undef OPCODE @@ -108,68 +128,68 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { return block_desc; } -void A32EmitX64::EmitA32GetRegister(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { +void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) { A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(result, MJitStateReg(reg)); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } -void A32EmitX64::EmitA32GetExtendedRegister32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { +void A32EmitX64::EmitA32GetExtendedRegister32(A32EmitContext& ctx, IR::Inst* inst) { A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); ASSERT(A32::IsSingleExtReg(reg)); - Xbyak::Xmm result = reg_alloc.ScratchXmm(); + Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code->movss(result, MJitStateExtReg(reg)); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } -void A32EmitX64::EmitA32GetExtendedRegister64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { +void A32EmitX64::EmitA32GetExtendedRegister64(A32EmitContext& ctx, IR::Inst* inst) { A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); ASSERT(A32::IsDoubleExtReg(reg)); - Xbyak::Xmm result = reg_alloc.ScratchXmm(); + Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code->movsd(result, MJitStateExtReg(reg)); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } -void A32EmitX64::EmitA32SetRegister(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); A32::Reg reg = inst->GetArg(0).GetA32RegRef(); if (args[1].IsImmediate()) { code->mov(MJitStateReg(reg), args[1].GetImmediateU32()); } else if (args[1].IsInXmm()) { - Xbyak::Xmm to_store = reg_alloc.UseXmm(args[1]); + Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); code->movd(MJitStateReg(reg), to_store); } else { - Xbyak::Reg32 to_store = reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[1]).cvt32(); code->mov(MJitStateReg(reg), to_store); } } -void A32EmitX64::EmitA32SetExtendedRegister32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32SetExtendedRegister32(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); ASSERT(A32::IsSingleExtReg(reg)); if (args[1].IsInXmm()) { - Xbyak::Xmm to_store = reg_alloc.UseXmm(args[1]); + Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); code->movss(MJitStateExtReg(reg), to_store); } else { - Xbyak::Reg32 to_store = reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[1]).cvt32(); code->mov(MJitStateExtReg(reg), to_store); } } -void A32EmitX64::EmitA32SetExtendedRegister64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32SetExtendedRegister64(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); ASSERT(A32::IsDoubleExtReg(reg)); if (args[1].IsInXmm()) { - Xbyak::Xmm to_store = reg_alloc.UseXmm(args[1]); + Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); code->movsd(MJitStateExtReg(reg), to_store); } else { - Xbyak::Reg64 to_store = reg_alloc.UseGpr(args[1]); + Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]); code->mov(MJitStateExtReg(reg), to_store); } } @@ -178,11 +198,11 @@ static u32 GetCpsrImpl(A32JitState* jit_state) { return jit_state->Cpsr(); } -void A32EmitX64::EmitA32GetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { +void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 b = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 c = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 c = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(c, dword[r15 + offsetof(A32JitState, CPSR_ge)]); // Here we observe that CPSR_q and CPSR_nzcv are right next to each other in memory, @@ -202,9 +222,9 @@ void A32EmitX64::EmitA32GetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) code->or_(result, dword[r15 + offsetof(A32JitState, CPSR_jaifm)]); code->or_(result, c); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } else { - reg_alloc.HostCall(inst); + ctx.reg_alloc.HostCall(inst); code->mov(code->ABI_PARAM1, code->r15); code->CallFunction(&GetCpsrImpl); } @@ -214,36 +234,36 @@ static void SetCpsrImpl(u32 value, A32JitState* jit_state) { jit_state->SetCpsr(value); } -void A32EmitX64::EmitA32SetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - reg_alloc.HostCall(nullptr, args[0]); +void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(nullptr, args[0]); code->mov(code->ABI_PARAM2, code->r15); code->CallFunction(&SetCpsrImpl); } -void A32EmitX64::EmitA32SetCpsrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { u32 imm = args[0].GetImmediateU32(); code->mov(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], u32(imm & 0xF0000000)); } else { - Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code->and_(a, 0xF0000000); code->mov(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], a); } } -void A32EmitX64::EmitA32SetCpsrNZCVQ(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { u32 imm = args[0].GetImmediateU32(); code->mov(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], u32(imm & 0xF0000000)); code->mov(code->byte[r15 + offsetof(A32JitState, CPSR_q)], u8((imm & 0x08000000) != 0 ? 1 : 0)); } else { - Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code->bt(a, 27); code->setc(code->byte[r15 + offsetof(A32JitState, CPSR_q)]); @@ -252,17 +272,17 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(RegAlloc& reg_alloc, IR::Block&, IR::Inst* } } -void A32EmitX64::EmitA32GetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); +void A32EmitX64::EmitA32GetNFlag(A32EmitContext& ctx, IR::Inst* inst) { + Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(result, dword[r15 + offsetof(A32JitState, CPSR_nzcv)]); code->shr(result, 31); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } -void A32EmitX64::EmitA32SetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { +void A32EmitX64::EmitA32SetNFlag(A32EmitContext& ctx, IR::Inst* inst) { constexpr size_t flag_bit = 31; constexpr u32 flag_mask = 1u << flag_bit; - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { code->or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], flag_mask); @@ -270,7 +290,7 @@ void A32EmitX64::EmitA32SetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); } } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); @@ -278,18 +298,18 @@ void A32EmitX64::EmitA32SetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst } } -void A32EmitX64::EmitA32GetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); +void A32EmitX64::EmitA32GetZFlag(A32EmitContext& ctx, IR::Inst* inst) { + Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(result, dword[r15 + offsetof(A32JitState, CPSR_nzcv)]); code->shr(result, 30); code->and_(result, 1); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } -void A32EmitX64::EmitA32SetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { +void A32EmitX64::EmitA32SetZFlag(A32EmitContext& ctx, IR::Inst* inst) { constexpr size_t flag_bit = 30; constexpr u32 flag_mask = 1u << flag_bit; - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { code->or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], flag_mask); @@ -297,7 +317,7 @@ void A32EmitX64::EmitA32SetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); } } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); @@ -305,18 +325,18 @@ void A32EmitX64::EmitA32SetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst } } -void A32EmitX64::EmitA32GetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); +void A32EmitX64::EmitA32GetCFlag(A32EmitContext& ctx, IR::Inst* inst) { + Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(result, dword[r15 + offsetof(A32JitState, CPSR_nzcv)]); code->shr(result, 29); code->and_(result, 1); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } -void A32EmitX64::EmitA32SetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { +void A32EmitX64::EmitA32SetCFlag(A32EmitContext& ctx, IR::Inst* inst) { constexpr size_t flag_bit = 29; constexpr u32 flag_mask = 1u << flag_bit; - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { code->or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], flag_mask); @@ -324,7 +344,7 @@ void A32EmitX64::EmitA32SetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); } } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); @@ -332,18 +352,18 @@ void A32EmitX64::EmitA32SetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst } } -void A32EmitX64::EmitA32GetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); +void A32EmitX64::EmitA32GetVFlag(A32EmitContext& ctx, IR::Inst* inst) { + Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(result, dword[r15 + offsetof(A32JitState, CPSR_nzcv)]); code->shr(result, 28); code->and_(result, 1); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } -void A32EmitX64::EmitA32SetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { +void A32EmitX64::EmitA32SetVFlag(A32EmitContext& ctx, IR::Inst* inst) { constexpr size_t flag_bit = 28; constexpr u32 flag_mask = 1u << flag_bit; - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) { code->or_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], flag_mask); @@ -351,7 +371,7 @@ void A32EmitX64::EmitA32SetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); } } else { - Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shl(to_store, flag_bit); code->and_(dword[r15 + offsetof(A32JitState, CPSR_nzcv)], ~flag_mask); @@ -359,39 +379,39 @@ void A32EmitX64::EmitA32SetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst } } -void A32EmitX64::EmitA32OrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { if (args[0].GetImmediateU1()) code->mov(dword[r15 + offsetof(A32JitState, CPSR_q)], 1); } else { - Xbyak::Reg8 to_store = reg_alloc.UseGpr(args[0]).cvt8(); + Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); code->or_(code->byte[r15 + offsetof(A32JitState, CPSR_q)], to_store); } } -void A32EmitX64::EmitA32GetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Xmm result = reg_alloc.ScratchXmm(); +void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { + Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code->movd(result, dword[r15 + offsetof(A32JitState, CPSR_ge)]); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } -void A32EmitX64::EmitA32SetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(!args[0].IsImmediate()); if (args[0].IsInXmm()) { - Xbyak::Xmm to_store = reg_alloc.UseXmm(args[0]); + Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]); code->movd(dword[r15 + offsetof(A32JitState, CPSR_ge)], to_store); } else { - Xbyak::Reg32 to_store = reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt32(); code->mov(dword[r15 + offsetof(A32JitState, CPSR_ge)], to_store); } } -void A32EmitX64::EmitA32SetGEFlagsCompressed(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { u32 imm = args[0].GetImmediateU32(); u32 ge = 0; @@ -402,8 +422,8 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(RegAlloc& reg_alloc, IR::Block&, IR code->mov(dword[r15 + offsetof(A32JitState, CPSR_ge)], ge); } else if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) { - Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg32 b = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(b, 0x01010101); code->shr(a, 16); @@ -411,7 +431,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(RegAlloc& reg_alloc, IR::Block&, IR code->imul(a, a, 0xFF); code->mov(dword[r15 + offsetof(A32JitState, CPSR_ge)], a); } else { - Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code->shr(a, 16); code->and_(a, 0xF); @@ -422,8 +442,8 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(RegAlloc& reg_alloc, IR::Block&, IR } } -void A32EmitX64::EmitA32BXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto& arg = args[0]; // Pseudocode: @@ -440,16 +460,16 @@ void A32EmitX64::EmitA32BXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Ins u32 new_pc = arg.GetImmediateU32(); u32 mask = Common::Bit<0>(new_pc) ? 0xFFFFFFFE : 0xFFFFFFFC; u32 et = 0; - et |= A32::LocationDescriptor{block.Location()}.EFlag() ? 2 : 0; + et |= ctx.Location().EFlag() ? 2 : 0; et |= Common::Bit<0>(new_pc) ? 1 : 0; code->mov(MJitStateReg(A32::Reg::PC), new_pc & mask); code->mov(dword[r15 + offsetof(A32JitState, CPSR_et)], et); } else { - if (A32::LocationDescriptor{block.Location()}.EFlag()) { - Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32(); - Xbyak::Reg32 mask = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 et = reg_alloc.ScratchGpr().cvt32(); + if (ctx.Location().EFlag()) { + Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 et = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(mask, new_pc); code->and_(mask, 1); @@ -459,8 +479,8 @@ void A32EmitX64::EmitA32BXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Ins code->and_(new_pc, mask); code->mov(MJitStateReg(A32::Reg::PC), new_pc); } else { - Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32(); - Xbyak::Reg32 mask = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32(); + Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(mask, new_pc); code->and_(mask, 1); @@ -472,16 +492,16 @@ void A32EmitX64::EmitA32BXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Ins } } -void A32EmitX64::EmitA32CallSupervisor(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - reg_alloc.HostCall(nullptr); +void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) { + ctx.reg_alloc.HostCall(nullptr); code->SwitchMxcsrOnExit(); code->mov(code->ABI_PARAM1, qword[r15 + offsetof(A32JitState, cycles_to_run)]); code->sub(code->ABI_PARAM1, qword[r15 + offsetof(A32JitState, cycles_remaining)]); code->CallFunction(cb.AddTicks); - reg_alloc.EndOfAllocScope(); - auto args = reg_alloc.GetArgumentInfo(inst); - reg_alloc.HostCall(nullptr, args[0]); + ctx.reg_alloc.EndOfAllocScope(); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(nullptr, args[0]); code->CallFunction(cb.CallSVC); code->CallFunction(cb.GetTicksRemaining); code->mov(qword[r15 + offsetof(A32JitState, cycles_to_run)], code->ABI_RETURN); @@ -493,8 +513,8 @@ static u32 GetFpscrImpl(A32JitState* jit_state) { return jit_state->Fpscr(); } -void A32EmitX64::EmitA32GetFpscr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - reg_alloc.HostCall(inst); +void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) { + ctx.reg_alloc.HostCall(inst); code->mov(code->ABI_PARAM1, code->r15); code->stmxcsr(code->dword[code->r15 + offsetof(A32JitState, guest_MXCSR)]); @@ -505,36 +525,36 @@ static void SetFpscrImpl(u32 value, A32JitState* jit_state) { jit_state->SetFpscr(value); } -void A32EmitX64::EmitA32SetFpscr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - reg_alloc.HostCall(nullptr, args[0]); +void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(nullptr, args[0]); code->mov(code->ABI_PARAM2, code->r15); code->CallFunction(&SetFpscrImpl); code->ldmxcsr(code->dword[code->r15 + offsetof(A32JitState, guest_MXCSR)]); } -void A32EmitX64::EmitA32GetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); +void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { + Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(result, dword[r15 + offsetof(A32JitState, FPSCR_nzcv)]); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } -void A32EmitX64::EmitA32SetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 value = reg_alloc.UseGpr(args[0]).cvt32(); +void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[0]).cvt32(); code->mov(dword[r15 + offsetof(A32JitState, FPSCR_nzcv)], value); } -void A32EmitX64::EmitA32ClearExclusive(RegAlloc&, IR::Block&, IR::Inst*) { +void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) { code->mov(code->byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); } -void A32EmitX64::EmitA32SetExclusive(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32SetExclusive(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); - Xbyak::Reg32 address = reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 address = ctx.reg_alloc.UseGpr(args[0]).cvt32(); code->mov(code->byte[r15 + offsetof(A32JitState, exclusive_state)], u8(1)); code->mov(dword[r15 + offsetof(A32JitState, exclusive_address)], address); @@ -644,36 +664,36 @@ static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, code->L(end); } -void A32EmitX64::EmitA32ReadMemory8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - ReadMemory(code, reg_alloc, inst, cb, 8, cb.memory.Read8); +void A32EmitX64::EmitA32ReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { + ReadMemory(code, ctx.reg_alloc, inst, cb, 8, cb.memory.Read8); } -void A32EmitX64::EmitA32ReadMemory16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - ReadMemory(code, reg_alloc, inst, cb, 16, cb.memory.Read16); +void A32EmitX64::EmitA32ReadMemory16(A32EmitContext& ctx, IR::Inst* inst) { + ReadMemory(code, ctx.reg_alloc, inst, cb, 16, cb.memory.Read16); } -void A32EmitX64::EmitA32ReadMemory32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - ReadMemory(code, reg_alloc, inst, cb, 32, cb.memory.Read32); +void A32EmitX64::EmitA32ReadMemory32(A32EmitContext& ctx, IR::Inst* inst) { + ReadMemory(code, ctx.reg_alloc, inst, cb, 32, cb.memory.Read32); } -void A32EmitX64::EmitA32ReadMemory64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - ReadMemory(code, reg_alloc, inst, cb, 64, cb.memory.Read64); +void A32EmitX64::EmitA32ReadMemory64(A32EmitContext& ctx, IR::Inst* inst) { + ReadMemory(code, ctx.reg_alloc, inst, cb, 64, cb.memory.Read64); } -void A32EmitX64::EmitA32WriteMemory8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - WriteMemory(code, reg_alloc, inst, cb, 8, cb.memory.Write8); +void A32EmitX64::EmitA32WriteMemory8(A32EmitContext& ctx, IR::Inst* inst) { + WriteMemory(code, ctx.reg_alloc, inst, cb, 8, cb.memory.Write8); } -void A32EmitX64::EmitA32WriteMemory16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - WriteMemory(code, reg_alloc, inst, cb, 16, cb.memory.Write16); +void A32EmitX64::EmitA32WriteMemory16(A32EmitContext& ctx, IR::Inst* inst) { + WriteMemory(code, ctx.reg_alloc, inst, cb, 16, cb.memory.Write16); } -void A32EmitX64::EmitA32WriteMemory32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - WriteMemory(code, reg_alloc, inst, cb, 32, cb.memory.Write32); +void A32EmitX64::EmitA32WriteMemory32(A32EmitContext& ctx, IR::Inst* inst) { + WriteMemory(code, ctx.reg_alloc, inst, cb, 32, cb.memory.Write32); } -void A32EmitX64::EmitA32WriteMemory64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - WriteMemory(code, reg_alloc, inst, cb, 64, cb.memory.Write64); +void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { + WriteMemory(code, ctx.reg_alloc, inst, cb, 64, cb.memory.Write64); } template @@ -709,20 +729,20 @@ static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* ins reg_alloc.DefineValue(inst, passed); } -void A32EmitX64::EmitA32ExclusiveWriteMemory8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - ExclusiveWrite(code, reg_alloc, inst, cb.memory.Write8, false); +void A32EmitX64::EmitA32ExclusiveWriteMemory8(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveWrite(code, ctx.reg_alloc, inst, cb.memory.Write8, false); } -void A32EmitX64::EmitA32ExclusiveWriteMemory16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - ExclusiveWrite(code, reg_alloc, inst, cb.memory.Write16, false); +void A32EmitX64::EmitA32ExclusiveWriteMemory16(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveWrite(code, ctx.reg_alloc, inst, cb.memory.Write16, false); } -void A32EmitX64::EmitA32ExclusiveWriteMemory32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - ExclusiveWrite(code, reg_alloc, inst, cb.memory.Write32, false); +void A32EmitX64::EmitA32ExclusiveWriteMemory32(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveWrite(code, ctx.reg_alloc, inst, cb.memory.Write32, false); } -void A32EmitX64::EmitA32ExclusiveWriteMemory64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - ExclusiveWrite(code, reg_alloc, inst, cb.memory.Write64, true); +void A32EmitX64::EmitA32ExclusiveWriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { + ExclusiveWrite(code, ctx.reg_alloc, inst, cb.memory.Write64, true); } static void EmitCoprocessorException() { @@ -740,7 +760,7 @@ static void CallCoprocCallback(BlockOfCode* code, RegAlloc& reg_alloc, Jit* jit_ code->CallFunction(callback.function); } -void A32EmitX64::EmitA32CoprocInternalOperation(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { +void A32EmitX64::EmitA32CoprocInternalOperation(A32EmitContext& ctx, IR::Inst* inst) { auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -763,11 +783,11 @@ void A32EmitX64::EmitA32CoprocInternalOperation(RegAlloc& reg_alloc, IR::Block&, return; } - CallCoprocCallback(code, reg_alloc, jit_interface, *action); + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, *action); } -void A32EmitX64::EmitA32CoprocSendOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32CoprocSendOneWord(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -789,13 +809,13 @@ void A32EmitX64::EmitA32CoprocSendOneWord(RegAlloc& reg_alloc, IR::Block&, IR::I EmitCoprocessorException(); return; case 1: - CallCoprocCallback(code, reg_alloc, jit_interface, boost::get(action), nullptr, args[1]); + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, boost::get(action), nullptr, args[1]); return; case 2: { u32* destination_ptr = boost::get(action); - Xbyak::Reg32 reg_word = reg_alloc.UseGpr(args[1]).cvt32(); - Xbyak::Reg64 reg_destination_addr = reg_alloc.ScratchGpr(); + Xbyak::Reg32 reg_word = ctx.reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(); code->mov(reg_destination_addr, reinterpret_cast(destination_ptr)); code->mov(code->dword[reg_destination_addr], reg_word); @@ -807,8 +827,8 @@ void A32EmitX64::EmitA32CoprocSendOneWord(RegAlloc& reg_alloc, IR::Block&, IR::I } } -void A32EmitX64::EmitA32CoprocSendTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32CoprocSendTwoWords(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -828,14 +848,14 @@ void A32EmitX64::EmitA32CoprocSendTwoWords(RegAlloc& reg_alloc, IR::Block&, IR:: EmitCoprocessorException(); return; case 1: - CallCoprocCallback(code, reg_alloc, jit_interface, boost::get(action), nullptr, args[1], args[2]); + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, boost::get(action), nullptr, args[1], args[2]); return; case 2: { auto destination_ptrs = boost::get>(action); - Xbyak::Reg32 reg_word1 = reg_alloc.UseGpr(args[1]).cvt32(); - Xbyak::Reg32 reg_word2 = reg_alloc.UseGpr(args[2]).cvt32(); - Xbyak::Reg64 reg_destination_addr = reg_alloc.ScratchGpr(); + Xbyak::Reg32 reg_word1 = ctx.reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 reg_word2 = ctx.reg_alloc.UseGpr(args[2]).cvt32(); + Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(); code->mov(reg_destination_addr, reinterpret_cast(destination_ptrs[0])); code->mov(code->dword[reg_destination_addr], reg_word1); @@ -849,7 +869,7 @@ void A32EmitX64::EmitA32CoprocSendTwoWords(RegAlloc& reg_alloc, IR::Block&, IR:: } } -void A32EmitX64::EmitA32CoprocGetOneWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { +void A32EmitX64::EmitA32CoprocGetOneWord(A32EmitContext& ctx, IR::Inst* inst) { auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -871,18 +891,18 @@ void A32EmitX64::EmitA32CoprocGetOneWord(RegAlloc& reg_alloc, IR::Block&, IR::In EmitCoprocessorException(); return; case 1: - CallCoprocCallback(code, reg_alloc, jit_interface, boost::get(action), inst); + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, boost::get(action), inst); return; case 2: { u32* source_ptr = boost::get(action); - Xbyak::Reg32 reg_word = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg64 reg_source_addr = reg_alloc.ScratchGpr(); + Xbyak::Reg32 reg_word = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg64 reg_source_addr = ctx.reg_alloc.ScratchGpr(); code->mov(reg_source_addr, reinterpret_cast(source_ptr)); code->mov(reg_word, code->dword[reg_source_addr]); - reg_alloc.DefineValue(inst, reg_word); + ctx.reg_alloc.DefineValue(inst, reg_word); return; } @@ -891,7 +911,7 @@ void A32EmitX64::EmitA32CoprocGetOneWord(RegAlloc& reg_alloc, IR::Block&, IR::In } } -void A32EmitX64::EmitA32CoprocGetTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { +void A32EmitX64::EmitA32CoprocGetTwoWords(A32EmitContext& ctx, IR::Inst* inst) { auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -911,14 +931,14 @@ void A32EmitX64::EmitA32CoprocGetTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::I EmitCoprocessorException(); return; case 1: - CallCoprocCallback(code, reg_alloc, jit_interface, boost::get(action), inst); + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, boost::get(action), inst); return; case 2: { auto source_ptrs = boost::get>(action); - Xbyak::Reg64 reg_result = reg_alloc.ScratchGpr(); - Xbyak::Reg64 reg_destination_addr = reg_alloc.ScratchGpr(); - Xbyak::Reg64 reg_tmp = reg_alloc.ScratchGpr(); + Xbyak::Reg64 reg_result = ctx.reg_alloc.ScratchGpr(); + Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(); + Xbyak::Reg64 reg_tmp = ctx.reg_alloc.ScratchGpr(); code->mov(reg_destination_addr, reinterpret_cast(source_ptrs[1])); code->mov(reg_result.cvt32(), code->dword[reg_destination_addr]); @@ -927,7 +947,7 @@ void A32EmitX64::EmitA32CoprocGetTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::I code->mov(reg_tmp.cvt32(), code->dword[reg_destination_addr]); code->or_(reg_result, reg_tmp); - reg_alloc.DefineValue(inst, reg_result); + ctx.reg_alloc.DefineValue(inst, reg_result); return; } @@ -936,8 +956,8 @@ void A32EmitX64::EmitA32CoprocGetTwoWords(RegAlloc& reg_alloc, IR::Block&, IR::I } } -void A32EmitX64::EmitA32CoprocLoadWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32CoprocLoadWords(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -959,11 +979,11 @@ void A32EmitX64::EmitA32CoprocLoadWords(RegAlloc& reg_alloc, IR::Block&, IR::Ins return; } - CallCoprocCallback(code, reg_alloc, jit_interface, *action, nullptr, args[1]); + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, *action, nullptr, args[1]); } -void A32EmitX64::EmitA32CoprocStoreWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void A32EmitX64::EmitA32CoprocStoreWords(A32EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto coproc_info = inst->GetArg(0).GetCoprocInfo(); size_t coproc_num = coproc_info[0]; @@ -985,7 +1005,7 @@ void A32EmitX64::EmitA32CoprocStoreWords(RegAlloc& reg_alloc, IR::Block&, IR::In return; } - CallCoprocCallback(code, reg_alloc, jit_interface, *action, nullptr, args[1]); + CallCoprocCallback(code, ctx.reg_alloc, jit_interface, *action, nullptr, args[1]); } void A32EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) { diff --git a/src/backend_x64/a32_emit_x64.h b/src/backend_x64/a32_emit_x64.h index 66625466..1ba10595 100644 --- a/src/backend_x64/a32_emit_x64.h +++ b/src/backend_x64/a32_emit_x64.h @@ -15,7 +15,6 @@ #include -#include "backend_x64/reg_alloc.h" #include "backend_x64/emit_x64.h" #include "common/address_range.h" #include "dynarmic/callbacks.h" @@ -25,6 +24,16 @@ namespace Dynarmic { namespace BackendX64 { +class RegAlloc; + +struct A32EmitContext final : public EmitContext { + A32EmitContext(RegAlloc& reg_alloc, IR::Block& block); + A32::LocationDescriptor Location() const; + bool FPSCR_RoundTowardsZero() const override; + bool FPSCR_FTZ() const override; + bool FPSCR_DN() const override; +}; + class A32EmitX64 final : public EmitX64 { public: A32EmitX64(BlockOfCode* code, UserCallbacks cb, Jit* jit_interface); @@ -39,7 +48,7 @@ public: protected: // Microinstruction emitters #define OPCODE(...) -#define A32OPC(name, type, ...) void EmitA32##name(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst); +#define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst); #include "frontend/ir/opcodes.inc" #undef OPCODE #undef A32OPC diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 967c7ec1..d1d8640b 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -41,7 +41,10 @@ constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double -static void EraseInstruction(IR::Block& block, IR::Inst* inst) { +EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block) + : reg_alloc(reg_alloc), block(block) {} + +void EmitContext::EraseInstruction(IR::Inst* inst) { block.Instructions().erase(inst); inst->Invalidate(); } @@ -63,19 +66,19 @@ boost::optional::BlockDescriptor> EmitX64::GetBasicBl } template -void EmitX64::EmitVoid(RegAlloc&, IR::Block&, IR::Inst*) { +void EmitX64::EmitVoid(EmitContext&, IR::Inst*) { } template -void EmitX64::EmitBreakpoint(RegAlloc&, IR::Block&, IR::Inst*) { +void EmitX64::EmitBreakpoint(EmitContext&, IR::Inst*) { code->int3(); } template -void EmitX64::EmitIdentity(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (!args[0].IsImmediate()) { - reg_alloc.DefineValue(inst, args[0]); + ctx.reg_alloc.DefineValue(inst, args[0]); } } @@ -104,118 +107,118 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_r } template -void EmitX64::EmitPushRSB(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[0].IsImmediate()); u64 unique_hash_of_target = args[0].GetImmediateU64(); - reg_alloc.ScratchGpr({HostLoc::RCX}); - Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); - Xbyak::Reg64 index_reg = reg_alloc.ScratchGpr(); + ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); + Xbyak::Reg64 loc_desc_reg = ctx.reg_alloc.ScratchGpr(); + Xbyak::Reg64 index_reg = ctx.reg_alloc.ScratchGpr(); PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target}); } template -void EmitX64::EmitGetCarryFromOp(RegAlloc&, IR::Block&, IR::Inst*) { +void EmitX64::EmitGetCarryFromOp(EmitContext&, IR::Inst*) { ASSERT_MSG(false, "should never happen"); } template -void EmitX64::EmitGetOverflowFromOp(RegAlloc&, IR::Block&, IR::Inst*) { +void EmitX64::EmitGetOverflowFromOp(EmitContext&, IR::Inst*) { ASSERT_MSG(false, "should never happen"); } template -void EmitX64::EmitGetGEFromOp(RegAlloc&, IR::Block&, IR::Inst*) { +void EmitX64::EmitGetGEFromOp(EmitContext&, IR::Inst*) { ASSERT_MSG(false, "should never happen"); } template -void EmitX64::EmitPack2x32To1x64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 lo = reg_alloc.UseScratchGpr(args[0]); - Xbyak::Reg64 hi = reg_alloc.UseScratchGpr(args[1]); +void EmitX64::EmitPack2x32To1x64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 lo = ctx.reg_alloc.UseScratchGpr(args[0]); + Xbyak::Reg64 hi = ctx.reg_alloc.UseScratchGpr(args[1]); code->shl(hi, 32); code->mov(lo.cvt32(), lo.cvt32()); // Zero extend to 64-bits code->or_(lo, hi); - reg_alloc.DefineValue(inst, lo); + ctx.reg_alloc.DefineValue(inst, lo); } template -void EmitX64::EmitLeastSignificantWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - reg_alloc.DefineValue(inst, args[0]); +void EmitX64::EmitLeastSignificantWord(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.DefineValue(inst, args[0]); } template -void EmitX64::EmitMostSignificantWord(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { +void EmitX64::EmitMostSignificantWord(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->shr(result, 32); if (carry_inst) { - EraseInstruction(block, carry_inst); - Xbyak::Reg64 carry = reg_alloc.ScratchGpr(); + ctx.EraseInstruction(carry_inst); + Xbyak::Reg64 carry = ctx.reg_alloc.ScratchGpr(); code->setc(carry.cvt8()); - reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(carry_inst, carry); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitLeastSignificantHalf(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - reg_alloc.DefineValue(inst, args[0]); +void EmitX64::EmitLeastSignificantHalf(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.DefineValue(inst, args[0]); } template -void EmitX64::EmitLeastSignificantByte(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - reg_alloc.DefineValue(inst, args[0]); +void EmitX64::EmitLeastSignificantByte(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.DefineValue(inst, args[0]); } template -void EmitX64::EmitMostSignificantBit(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); +void EmitX64::EmitMostSignificantBit(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); // TODO: Flag optimization code->shr(result, 31); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitIsZero(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); +void EmitX64::EmitIsZero(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); // TODO: Flag optimization code->test(result, result); code->sete(result.cvt8()); code->movzx(result, result.cvt8()); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitIsZero64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); +void EmitX64::EmitIsZero64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); // TODO: Flag optimization code->test(result, result); code->sete(result.cvt8()); code->movzx(result, result.cvt8()); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { +void EmitX64::EmitLogicalShiftLeft(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto& operand_arg = args[0]; auto& shift_arg = args[1]; auto& carry_arg = args[2]; @@ -224,7 +227,7 @@ void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, I if (!carry_inst) { if (shift_arg.IsImmediate()) { - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); u8 shift = shift_arg.GetImmediateU8(); if (shift <= 31) { @@ -233,11 +236,11 @@ void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, I code->xor_(result, result); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } else { - reg_alloc.Use(shift_arg, HostLoc::RCX); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); - Xbyak::Reg32 zero = reg_alloc.ScratchGpr().cvt32(); + ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 zero = ctx.reg_alloc.ScratchGpr().cvt32(); // The 32-bit x64 SHL instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. @@ -247,15 +250,15 @@ void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, I code->cmp(code->cl, 32); code->cmovnb(result, zero); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } } else { - EraseInstruction(block, carry_inst); + ctx.EraseInstruction(carry_inst); if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetImmediateU8(); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); - Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt32(); if (shift == 0) { // There is nothing more to do. @@ -272,12 +275,12 @@ void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, I code->and_(carry, 1); } - reg_alloc.DefineValue(inst, result); - reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(carry_inst, carry); } else { - reg_alloc.Use(shift_arg, HostLoc::RCX); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); - Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32(); + ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt32(); // TODO: Optimize this. @@ -306,24 +309,24 @@ void EmitX64::EmitLogicalShiftLeft(RegAlloc& reg_alloc, IR::Block& block, I code->outLocalLabel(); - reg_alloc.DefineValue(inst, result); - reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(carry_inst, carry); } } } template -void EmitX64::EmitLogicalShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { +void EmitX64::EmitLogicalShiftRight(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto& operand_arg = args[0]; auto& shift_arg = args[1]; auto& carry_arg = args[2]; if (!carry_inst) { if (shift_arg.IsImmediate()) { - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); u8 shift = shift_arg.GetImmediateU8(); if (shift <= 31) { @@ -332,11 +335,11 @@ void EmitX64::EmitLogicalShiftRight(RegAlloc& reg_alloc, IR::Block& block, code->xor_(result, result); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } else { - reg_alloc.Use(shift_arg, HostLoc::RCX); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); - Xbyak::Reg32 zero = reg_alloc.ScratchGpr().cvt32(); + ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 zero = ctx.reg_alloc.ScratchGpr().cvt32(); // The 32-bit x64 SHR instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. @@ -346,15 +349,15 @@ void EmitX64::EmitLogicalShiftRight(RegAlloc& reg_alloc, IR::Block& block, code->cmp(code->cl, 32); code->cmovnb(result, zero); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } } else { - EraseInstruction(block, carry_inst); + ctx.EraseInstruction(carry_inst); if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetImmediateU8(); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); - Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt32(); if (shift == 0) { // There is nothing more to do. @@ -370,12 +373,12 @@ void EmitX64::EmitLogicalShiftRight(RegAlloc& reg_alloc, IR::Block& block, code->xor_(carry, carry); } - reg_alloc.DefineValue(inst, result); - reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(carry_inst, carry); } else { - reg_alloc.Use(shift_arg, HostLoc::RCX); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); - Xbyak::Reg32 carry = reg_alloc.UseScratchGpr(carry_arg).cvt32(); + ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt32(); // TODO: Optimize this. @@ -406,34 +409,34 @@ void EmitX64::EmitLogicalShiftRight(RegAlloc& reg_alloc, IR::Block& block, code->outLocalLabel(); - reg_alloc.DefineValue(inst, result); - reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(carry_inst, carry); } } } template -void EmitX64::EmitLogicalShiftRight64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto& operand_arg = args[0]; auto& shift_arg = args[1]; ASSERT_MSG(shift_arg.IsImmediate(), "variable 64 bit shifts are not implemented"); ASSERT_MSG(shift_arg.GetImmediateU8() < 64, "shift width clamping is not implemented"); - Xbyak::Reg64 result = reg_alloc.UseScratchGpr(operand_arg); + Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); u8 shift = shift_arg.GetImmediateU8(); code->shr(result.cvt64(), shift); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { +void EmitX64::EmitArithmeticShiftRight(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto& operand_arg = args[0]; auto& shift_arg = args[1]; auto& carry_arg = args[2]; @@ -441,15 +444,15 @@ void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& bloc if (!carry_inst) { if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetImmediateU8(); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); code->sar(result, u8(shift < 31 ? shift : 31)); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } else { - reg_alloc.UseScratch(shift_arg, HostLoc::RCX); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); - Xbyak::Reg32 const31 = reg_alloc.ScratchGpr().cvt32(); + ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 const31 = ctx.reg_alloc.ScratchGpr().cvt32(); // The 32-bit x64 SAR instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count. @@ -461,15 +464,15 @@ void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& bloc code->cmovg(code->ecx, const31); code->sar(result, code->cl); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } } else { - EraseInstruction(block, carry_inst); + ctx.EraseInstruction(carry_inst); if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetImmediateU8(); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); - Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt8(); if (shift == 0) { // There is nothing more to do. @@ -482,12 +485,12 @@ void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& bloc code->setc(carry); } - reg_alloc.DefineValue(inst, result); - reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(carry_inst, carry); } else { - reg_alloc.Use(shift_arg, HostLoc::RCX); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); - Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8(); + ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt8(); // TODO: Optimize this. @@ -512,17 +515,17 @@ void EmitX64::EmitArithmeticShiftRight(RegAlloc& reg_alloc, IR::Block& bloc code->outLocalLabel(); - reg_alloc.DefineValue(inst, result); - reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(carry_inst, carry); } } } template -void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { +void EmitX64::EmitRotateRight(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto& operand_arg = args[0]; auto& shift_arg = args[1]; auto& carry_arg = args[2]; @@ -530,27 +533,27 @@ void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::In if (!carry_inst) { if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetImmediateU8(); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); code->ror(result, u8(shift & 0x1F)); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } else { - reg_alloc.Use(shift_arg, HostLoc::RCX); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); + ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); // x64 ROR instruction does (shift & 0x1F) for us. code->ror(result, code->cl); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } } else { - EraseInstruction(block, carry_inst); + ctx.EraseInstruction(carry_inst); if (shift_arg.IsImmediate()) { u8 shift = shift_arg.GetImmediateU8(); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); - Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt8(); if (shift == 0) { // There is nothing more to do. @@ -562,12 +565,12 @@ void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::In code->setc(carry); } - reg_alloc.DefineValue(inst, result); - reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(carry_inst, carry); } else { - reg_alloc.UseScratch(shift_arg, HostLoc::RCX); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(operand_arg).cvt32(); - Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(carry_arg).cvt8(); + ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt8(); // TODO: Optimize @@ -592,33 +595,33 @@ void EmitX64::EmitRotateRight(RegAlloc& reg_alloc, IR::Block& block, IR::In code->outLocalLabel(); - reg_alloc.DefineValue(inst, result); - reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(carry_inst, carry); } } } template -void EmitX64::EmitRotateRightExtended(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { +void EmitX64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg8 carry = reg_alloc.UseScratchGpr(args[1]).cvt8(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(args[1]).cvt8(); code->bt(carry.cvt32(), 0); code->rcr(result, 1); if (carry_inst) { - EraseInstruction(block, carry_inst); + ctx.EraseInstruction(carry_inst); code->setc(carry); - reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(carry_inst, carry); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } const Xbyak::Reg64 INVALID_REG = Xbyak::Reg64(-1); @@ -632,16 +635,16 @@ static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* ca } template -void EmitX64::EmitAddWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { +void EmitX64::EmitAddWithCarry(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto& carry_in = args[2]; - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg8 carry = DoCarry(reg_alloc, carry_in, carry_inst); - Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.ScratchGpr().cvt8() : INVALID_REG.cvt8(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg8 carry = DoCarry(ctx.reg_alloc, carry_in, carry_inst); + Xbyak::Reg8 overflow = overflow_inst ? ctx.reg_alloc.ScratchGpr().cvt8() : INVALID_REG.cvt8(); // TODO: Consider using LEA. @@ -659,7 +662,7 @@ void EmitX64::EmitAddWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::I code->adc(result, op_arg); } } else { - OpArg op_arg = reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); if (carry_in.IsImmediate()) { if (carry_in.GetImmediateU1()) { @@ -675,42 +678,42 @@ void EmitX64::EmitAddWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::I } if (carry_inst) { - EraseInstruction(block, carry_inst); + ctx.EraseInstruction(carry_inst); code->setc(carry); - reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(carry_inst, carry); } if (overflow_inst) { - EraseInstruction(block, overflow_inst); + ctx.EraseInstruction(overflow_inst); code->seto(overflow); - reg_alloc.DefineValue(overflow_inst, overflow); + ctx.reg_alloc.DefineValue(overflow_inst, overflow); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitAdd64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitAdd64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); - Xbyak::Reg64 op_arg = reg_alloc.UseGpr(args[1]); + Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + Xbyak::Reg64 op_arg = ctx.reg_alloc.UseGpr(args[1]); code->add(result, op_arg); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitSubWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { +void EmitX64::EmitSubWithCarry(EmitContext& ctx, IR::Inst* inst) { auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto& carry_in = args[2]; - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg8 carry = DoCarry(reg_alloc, carry_in, carry_inst); - Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.ScratchGpr().cvt8() : INVALID_REG.cvt8(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg8 carry = DoCarry(ctx.reg_alloc, carry_in, carry_inst); + Xbyak::Reg8 overflow = overflow_inst ? ctx.reg_alloc.ScratchGpr().cvt8() : INVALID_REG.cvt8(); // TODO: Consider using LEA. // TODO: Optimize CMP case. @@ -731,7 +734,7 @@ void EmitX64::EmitSubWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::I code->sbb(result, op_arg); } } else { - OpArg op_arg = reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); if (carry_in.IsImmediate()) { if (carry_in.GetImmediateU1()) { @@ -748,219 +751,219 @@ void EmitX64::EmitSubWithCarry(RegAlloc& reg_alloc, IR::Block& block, IR::I } if (carry_inst) { - EraseInstruction(block, carry_inst); + ctx.EraseInstruction(carry_inst); code->setnc(carry); - reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(carry_inst, carry); } if (overflow_inst) { - EraseInstruction(block, overflow_inst); + ctx.EraseInstruction(overflow_inst); code->seto(overflow); - reg_alloc.DefineValue(overflow_inst, overflow); + ctx.reg_alloc.DefineValue(overflow_inst, overflow); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitSub64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitSub64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); - Xbyak::Reg64 op_arg = reg_alloc.UseGpr(args[1]); + Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + Xbyak::Reg64 op_arg = ctx.reg_alloc.UseGpr(args[1]); code->sub(result, op_arg); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitMul(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitMul(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); if (args[1].IsImmediate()) { code->imul(result, result, args[1].GetImmediateU32()); } else { - OpArg op_arg = reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); code->imul(result, *op_arg); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitMul64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitMul64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); - OpArg op_arg = reg_alloc.UseOpArg(args[1]); + Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); code->imul(result, *op_arg); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitAnd(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitAnd(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); if (args[1].IsImmediate()) { u32 op_arg = args[1].GetImmediateU32(); code->and_(result, op_arg); } else { - OpArg op_arg = reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); code->and_(result, *op_arg); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitEor(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitEor(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); if (args[1].IsImmediate()) { u32 op_arg = args[1].GetImmediateU32(); code->xor_(result, op_arg); } else { - OpArg op_arg = reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); code->xor_(result, *op_arg); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitOr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitOr(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); if (args[1].IsImmediate()) { u32 op_arg = args[1].GetImmediateU32(); code->or_(result, op_arg); } else { - OpArg op_arg = reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); op_arg.setBit(32); code->or_(result, *op_arg); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitNot(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitNot(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg32 result; if (args[0].IsImmediate()) { - result = reg_alloc.ScratchGpr().cvt32(); + result = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(result, u32(~args[0].GetImmediateU32())); } else { - result = reg_alloc.UseScratchGpr(args[0]).cvt32(); + result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code->not_(result); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitSignExtendWordToLong(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); +void EmitX64::EmitSignExtendWordToLong(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movsxd(result.cvt64(), result.cvt32()); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitSignExtendHalfToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); +void EmitX64::EmitSignExtendHalfToWord(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movsx(result.cvt32(), result.cvt16()); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitSignExtendByteToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); +void EmitX64::EmitSignExtendByteToWord(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movsx(result.cvt32(), result.cvt8()); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitZeroExtendWordToLong(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); +void EmitX64::EmitZeroExtendWordToLong(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->mov(result.cvt32(), result.cvt32()); // x64 zeros upper 32 bits on a 32-bit move - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitZeroExtendHalfToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); +void EmitX64::EmitZeroExtendHalfToWord(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movzx(result.cvt32(), result.cvt16()); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitZeroExtendByteToWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); +void EmitX64::EmitZeroExtendByteToWord(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->movzx(result.cvt32(), result.cvt8()); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitByteReverseWord(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); +void EmitX64::EmitByteReverseWord(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); code->bswap(result); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitByteReverseHalf(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg16 result = reg_alloc.UseScratchGpr(args[0]).cvt16(); +void EmitX64::EmitByteReverseHalf(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg16 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt16(); code->rol(result, 8); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitByteReverseDual(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 result = reg_alloc.UseScratchGpr(args[0]); +void EmitX64::EmitByteReverseDual(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); code->bswap(result); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitCountLeadingZeros(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitCountLeadingZeros(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) { - Xbyak::Reg32 source = reg_alloc.UseGpr(args[0]).cvt32(); - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 source = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); code->lzcnt(result, source); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } else { - Xbyak::Reg32 source = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); // The result of a bsr of zero is undefined, but zf is set after it. code->bsr(result, source); @@ -969,19 +972,19 @@ void EmitX64::EmitCountLeadingZeros(RegAlloc& reg_alloc, IR::Block&, IR::In code->neg(result); code->add(result, 31); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } } template -void EmitX64::EmitSignedSaturatedAdd(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { +void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg32 addend = reg_alloc.UseGpr(args[1]).cvt32(); - Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 addend = ctx.reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(overflow, result); code->shr(overflow, 31); @@ -991,25 +994,25 @@ void EmitX64::EmitSignedSaturatedAdd(RegAlloc& reg_alloc, IR::Block& block, code->cmovo(result, overflow); if (overflow_inst) { - EraseInstruction(block, overflow_inst); + ctx.EraseInstruction(overflow_inst); code->seto(overflow.cvt8()); - reg_alloc.DefineValue(overflow_inst, overflow); + ctx.reg_alloc.DefineValue(overflow_inst, overflow); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitSignedSaturatedSub(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { +void EmitX64::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 result = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg32 subend = reg_alloc.UseGpr(args[1]).cvt32(); - Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 subend = ctx.reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(overflow, result); code->shr(overflow, 31); @@ -1019,29 +1022,29 @@ void EmitX64::EmitSignedSaturatedSub(RegAlloc& reg_alloc, IR::Block& block, code->cmovo(result, overflow); if (overflow_inst) { - EraseInstruction(block, overflow_inst); + ctx.EraseInstruction(overflow_inst); code->seto(overflow.cvt8()); - reg_alloc.DefineValue(overflow_inst, overflow); + ctx.reg_alloc.DefineValue(overflow_inst, overflow); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitUnsignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { +void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); size_t N = args[1].GetImmediateU8(); ASSERT(N <= 31); u32 saturated_value = (1u << N) - 1; - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 reg_a = reg_alloc.UseGpr(args[0]).cvt32(); - Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); // Pseudocode: result = clamp(reg_a, 0, saturated_value); code->xor_(overflow, overflow); @@ -1051,21 +1054,21 @@ void EmitX64::EmitUnsignedSaturation(RegAlloc& reg_alloc, IR::Block& block, code->cmovbe(result, reg_a); if (overflow_inst) { - EraseInstruction(block, overflow_inst); + ctx.EraseInstruction(overflow_inst); code->seta(overflow.cvt8()); - reg_alloc.DefineValue(overflow_inst, overflow); + ctx.reg_alloc.DefineValue(overflow_inst, overflow); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitSignedSaturation(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { +void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); - auto args = reg_alloc.GetArgumentInfo(inst); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); size_t N = args[1].GetImmediateU8(); ASSERT(N >= 1 && N <= 32); @@ -1074,7 +1077,7 @@ void EmitX64::EmitSignedSaturation(RegAlloc& reg_alloc, IR::Block& block, I auto no_overflow = IR::Value(false); overflow_inst->ReplaceUsesWith(no_overflow); } - reg_alloc.DefineValue(inst, args[0]); + ctx.reg_alloc.DefineValue(inst, args[0]); return; } @@ -1083,10 +1086,10 @@ void EmitX64::EmitSignedSaturation(RegAlloc& reg_alloc, IR::Block& block, I u32 negative_saturated_value = 1u << (N - 1); u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value); - Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 reg_a = reg_alloc.UseGpr(args[0]).cvt32(); - Xbyak::Reg32 overflow = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); // overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value. code->lea(overflow, code->ptr[reg_a.cvt64() + negative_saturated_value]); @@ -1102,31 +1105,31 @@ void EmitX64::EmitSignedSaturation(RegAlloc& reg_alloc, IR::Block& block, I code->cmovbe(result, reg_a); if (overflow_inst) { - EraseInstruction(block, overflow_inst); + ctx.EraseInstruction(overflow_inst); code->seta(overflow.cvt8()); - reg_alloc.DefineValue(overflow_inst, overflow); + ctx.reg_alloc.DefineValue(overflow_inst, overflow); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitPackedAddU8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); code->paddb(xmm_a, xmm_b); if (ge_inst) { - EraseInstruction(block, ge_inst); + ctx.EraseInstruction(ge_inst); - Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); - Xbyak::Xmm ones = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); code->pcmpeqb(ones, ones); @@ -1135,25 +1138,25 @@ void EmitX64::EmitPackedAddU8(RegAlloc& reg_alloc, IR::Block& block, IR::In code->pcmpeqb(xmm_ge, xmm_b); code->pxor(xmm_ge, ones); - reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); } - reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(inst, xmm_a); } template -void EmitX64::EmitPackedAddS8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - EraseInstruction(block, ge_inst); + ctx.EraseInstruction(ge_inst); - Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); code->pxor(xmm_ge, xmm_ge); code->movdqa(saturated_sum, xmm_a); @@ -1162,30 +1165,30 @@ void EmitX64::EmitPackedAddS8(RegAlloc& reg_alloc, IR::Block& block, IR::In code->pcmpeqb(saturated_sum, saturated_sum); code->pxor(xmm_ge, saturated_sum); - reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); } code->paddb(xmm_a, xmm_b); - reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(inst, xmm_a); } template -void EmitX64::EmitPackedAddU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); code->paddw(xmm_a, xmm_b); if (ge_inst) { - EraseInstruction(block, ge_inst); + ctx.EraseInstruction(ge_inst); if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { - Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); - Xbyak::Xmm ones = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); code->pcmpeqb(ones, ones); @@ -1194,10 +1197,10 @@ void EmitX64::EmitPackedAddU16(RegAlloc& reg_alloc, IR::Block& block, IR::I code->pcmpeqw(xmm_ge, xmm_b); code->pxor(xmm_ge, ones); - reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); } else { - Xbyak::Xmm tmp_a = reg_alloc.ScratchXmm(); - Xbyak::Xmm tmp_b = reg_alloc.ScratchXmm(); + Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(); // !(b <= a+b) == b > a+b code->movdqa(tmp_a, xmm_a); @@ -1206,26 +1209,26 @@ void EmitX64::EmitPackedAddU16(RegAlloc& reg_alloc, IR::Block& block, IR::I code->paddw(tmp_b, code->MConst(0x80008000)); code->pcmpgtw(tmp_b, tmp_a); // *Signed* comparison! - reg_alloc.DefineValue(ge_inst, tmp_b); + ctx.reg_alloc.DefineValue(ge_inst, tmp_b); } } - reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(inst, xmm_a); } template -void EmitX64::EmitPackedAddS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - EraseInstruction(block, ge_inst); + ctx.EraseInstruction(ge_inst); - Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); code->pxor(xmm_ge, xmm_ge); code->movdqa(saturated_sum, xmm_a); @@ -1234,52 +1237,52 @@ void EmitX64::EmitPackedAddS16(RegAlloc& reg_alloc, IR::Block& block, IR::I code->pcmpeqw(saturated_sum, saturated_sum); code->pxor(xmm_ge, saturated_sum); - reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); } code->paddw(xmm_a, xmm_b); - reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(inst, xmm_a); } template -void EmitX64::EmitPackedSubU8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - EraseInstruction(block, ge_inst); + ctx.EraseInstruction(ge_inst); - Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); code->movdqa(xmm_ge, xmm_a); code->pmaxub(xmm_ge, xmm_b); code->pcmpeqb(xmm_ge, xmm_a); - reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); } code->psubb(xmm_a, xmm_b); - reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(inst, xmm_a); } template -void EmitX64::EmitPackedSubS8(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - EraseInstruction(block, ge_inst); + ctx.EraseInstruction(ge_inst); - Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); code->pxor(xmm_ge, xmm_ge); code->movdqa(saturated_sum, xmm_a); @@ -1288,36 +1291,36 @@ void EmitX64::EmitPackedSubS8(RegAlloc& reg_alloc, IR::Block& block, IR::In code->pcmpeqb(saturated_sum, saturated_sum); code->pxor(xmm_ge, saturated_sum); - reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); } code->psubb(xmm_a, xmm_b); - reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(inst, xmm_a); } template -void EmitX64::EmitPackedSubU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - EraseInstruction(block, ge_inst); + ctx.EraseInstruction(ge_inst); if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { - Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); code->movdqa(xmm_ge, xmm_a); code->pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1 code->pcmpeqw(xmm_ge, xmm_a); - reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); } else { - Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); - Xbyak::Xmm ones = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); // (a >= b) == !(b > a) code->pcmpeqb(ones, ones); @@ -1327,28 +1330,28 @@ void EmitX64::EmitPackedSubU16(RegAlloc& reg_alloc, IR::Block& block, IR::I code->pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison! code->pxor(xmm_ge, ones); - reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); } } code->psubw(xmm_a, xmm_b); - reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(inst, xmm_a); } template -void EmitX64::EmitPackedSubS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - EraseInstruction(block, ge_inst); + ctx.EraseInstruction(ge_inst); - Xbyak::Xmm saturated_diff = reg_alloc.ScratchXmm(); - Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm(); + Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); code->pxor(xmm_ge, xmm_ge); code->movdqa(saturated_diff, xmm_a); @@ -1357,22 +1360,22 @@ void EmitX64::EmitPackedSubS16(RegAlloc& reg_alloc, IR::Block& block, IR::I code->pcmpeqw(saturated_diff, saturated_diff); code->pxor(xmm_ge, saturated_diff); - reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); } code->psubw(xmm_a, xmm_b); - reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(inst, xmm_a); } template -void EmitX64::EmitPackedHalvingAddU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsInXmm() || args[1].IsInXmm()) { - Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = reg_alloc.UseScratchXmm(args[1]); - Xbyak::Xmm ones = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); + Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); // Since, // pavg(a, b) == (a + b + 1) >> 1 @@ -1385,11 +1388,11 @@ void EmitX64::EmitPackedHalvingAddU8(RegAlloc& reg_alloc, IR::Block&, IR::I code->pavgb(xmm_a, xmm_b); code->pxor(xmm_a, ones); - reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(inst, xmm_a); } else { - Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32(); - Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 and_a_b = reg_a; Xbyak::Reg32 result = reg_a; @@ -1405,18 +1408,18 @@ void EmitX64::EmitPackedHalvingAddU8(RegAlloc& reg_alloc, IR::Block&, IR::I code->and_(xor_a_b, 0x7F7F7F7F); code->add(result, xor_a_b); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } } template -void EmitX64::EmitPackedHalvingAddU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsInXmm() || args[1].IsInXmm()) { - Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); - Xbyak::Xmm tmp = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code->movdqa(tmp, xmm_a); code->pand(xmm_a, xmm_b); @@ -1424,11 +1427,11 @@ void EmitX64::EmitPackedHalvingAddU16(RegAlloc& reg_alloc, IR::Block&, IR:: code->psrlw(tmp, 1); code->paddw(xmm_a, tmp); - reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(inst, xmm_a); } else { - Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32(); - Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 and_a_b = reg_a; Xbyak::Reg32 result = reg_a; @@ -1444,20 +1447,20 @@ void EmitX64::EmitPackedHalvingAddU16(RegAlloc& reg_alloc, IR::Block&, IR:: code->and_(xor_a_b, 0x7FFF7FFF); code->add(result, xor_a_b); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } } template -void EmitX64::EmitPackedHalvingAddS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg32 reg_b = reg_alloc.UseGpr(args[1]).cvt32(); - Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 and_a_b = reg_a; Xbyak::Reg32 result = reg_a; - Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32(); // This relies on the equality x+y == ((x&y) << 1) + (x^y). // Note that x^y always contains the LSB of the result. @@ -1475,16 +1478,16 @@ void EmitX64::EmitPackedHalvingAddS8(RegAlloc& reg_alloc, IR::Block&, IR::I code->add(result, xor_a_b); code->xor_(result, carry); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitPackedHalvingAddS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); - Xbyak::Xmm tmp = reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); // This relies on the equality x+y == ((x&y) << 1) + (x^y). // Note that x^y always contains the LSB of the result. @@ -1497,15 +1500,15 @@ void EmitX64::EmitPackedHalvingAddS16(RegAlloc& reg_alloc, IR::Block&, IR:: code->psraw(tmp, 1); code->paddw(xmm_a, tmp); - reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(inst, xmm_a); } template -void EmitX64::EmitPackedHalvingSubU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32(); + Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // Note that x^y always contains the LSB of the result. @@ -1528,17 +1531,17 @@ void EmitX64::EmitPackedHalvingSubU8(RegAlloc& reg_alloc, IR::Block&, IR::I code->xor_(minuend, 0x80808080); // minuend now contains the desired result. - reg_alloc.DefineValue(inst, minuend); + ctx.reg_alloc.DefineValue(inst, minuend); } template -void EmitX64::EmitPackedHalvingSubS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 minuend = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(args[1]).cvt32(); + Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); - Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32(); // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // Note that x^y always contains the LSB of the result. @@ -1565,15 +1568,15 @@ void EmitX64::EmitPackedHalvingSubS8(RegAlloc& reg_alloc, IR::Block&, IR::I code->xor_(minuend, 0x80808080); code->xor_(minuend, carry); - reg_alloc.DefineValue(inst, minuend); + ctx.reg_alloc.DefineValue(inst, minuend); } template -void EmitX64::EmitPackedHalvingSubU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm minuend = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm subtrahend = reg_alloc.UseScratchXmm(args[1]); + Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]); // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // Note that x^y always contains the LSB of the result. @@ -1589,15 +1592,15 @@ void EmitX64::EmitPackedHalvingSubU16(RegAlloc& reg_alloc, IR::Block&, IR:: code->psubw(minuend, subtrahend); - reg_alloc.DefineValue(inst, minuend); + ctx.reg_alloc.DefineValue(inst, minuend); } template -void EmitX64::EmitPackedHalvingSubS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm minuend = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm subtrahend = reg_alloc.UseScratchXmm(args[1]); + Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]); // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // Note that x^y always contains the LSB of the result. @@ -1613,17 +1616,17 @@ void EmitX64::EmitPackedHalvingSubS16(RegAlloc& reg_alloc, IR::Block&, IR:: code->psubw(minuend, subtrahend); - reg_alloc.DefineValue(inst, minuend); + ctx.reg_alloc.DefineValue(inst, minuend); } -void EmitPackedSubAdd(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - Xbyak::Reg32 reg_a_hi = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg32 reg_b_hi = reg_alloc.UseScratchGpr(args[1]).cvt32(); - Xbyak::Reg32 reg_a_lo = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Reg32 reg_b_lo = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_a_hi = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 reg_b_hi = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + Xbyak::Reg32 reg_a_lo = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 reg_b_lo = ctx.reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg32 reg_sum, reg_diff; if (is_signed) { @@ -1651,7 +1654,7 @@ void EmitPackedSubAdd(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, } if (ge_inst) { - EraseInstruction(block, ge_inst); + ctx.EraseInstruction(ge_inst); // The reg_b registers are no longer required. Xbyak::Reg32 ge_sum = reg_b_hi; @@ -1673,7 +1676,7 @@ void EmitPackedSubAdd(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, code->and_(ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000); code->or_(ge_sum, ge_diff); - reg_alloc.DefineValue(ge_inst, ge_sum); + ctx.reg_alloc.DefineValue(ge_inst, ge_sum); } if (is_halving) { @@ -1687,142 +1690,142 @@ void EmitPackedSubAdd(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, // Merge them. code->shld(reg_a_hi, reg_a_lo, 16); - reg_alloc.DefineValue(inst, reg_a_hi); + ctx.reg_alloc.DefineValue(inst, reg_a_hi); } template -void EmitX64::EmitPackedAddSubU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - EmitPackedSubAdd(code, reg_alloc, block, inst, true, false, false); +void EmitX64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) { + EmitPackedSubAdd(code, ctx, inst, true, false, false); } template -void EmitX64::EmitPackedAddSubS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - EmitPackedSubAdd(code, reg_alloc, block, inst, true, true, false); +void EmitX64::EmitPackedAddSubS16(EmitContext& ctx, IR::Inst* inst) { + EmitPackedSubAdd(code, ctx, inst, true, true, false); } template -void EmitX64::EmitPackedSubAddU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - EmitPackedSubAdd(code, reg_alloc, block, inst, false, false, false); +void EmitX64::EmitPackedSubAddU16(EmitContext& ctx, IR::Inst* inst) { + EmitPackedSubAdd(code, ctx, inst, false, false, false); } template -void EmitX64::EmitPackedSubAddS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - EmitPackedSubAdd(code, reg_alloc, block, inst, false, true, false); +void EmitX64::EmitPackedSubAddS16(EmitContext& ctx, IR::Inst* inst) { + EmitPackedSubAdd(code, ctx, inst, false, true, false); } template -void EmitX64::EmitPackedHalvingAddSubU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - EmitPackedSubAdd(code, reg_alloc, block, inst, true, false, true); +void EmitX64::EmitPackedHalvingAddSubU16(EmitContext& ctx, IR::Inst* inst) { + EmitPackedSubAdd(code, ctx, inst, true, false, true); } template -void EmitX64::EmitPackedHalvingAddSubS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - EmitPackedSubAdd(code, reg_alloc, block, inst, true, true, true); +void EmitX64::EmitPackedHalvingAddSubS16(EmitContext& ctx, IR::Inst* inst) { + EmitPackedSubAdd(code, ctx, inst, true, true, true); } template -void EmitX64::EmitPackedHalvingSubAddU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - EmitPackedSubAdd(code, reg_alloc, block, inst, false, false, true); +void EmitX64::EmitPackedHalvingSubAddU16(EmitContext& ctx, IR::Inst* inst) { + EmitPackedSubAdd(code, ctx, inst, false, false, true); } template -void EmitX64::EmitPackedHalvingSubAddS16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - EmitPackedSubAdd(code, reg_alloc, block, inst, false, true, true); +void EmitX64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) { + EmitPackedSubAdd(code, ctx, inst, false, true, true); } -static void EmitPackedOperation(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) { - auto args = reg_alloc.GetArgumentInfo(inst); +static void EmitPackedOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm xmm_a = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm xmm_b = reg_alloc.UseXmm(args[1]); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); (code->*fn)(xmm_a, xmm_b); - reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(inst, xmm_a); } template -void EmitX64::EmitPackedSaturatedAddU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusb); +void EmitX64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) { + EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddusb); } template -void EmitX64::EmitPackedSaturatedAddS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddsb); +void EmitX64::EmitPackedSaturatedAddS8(EmitContext& ctx, IR::Inst* inst) { + EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddsb); } template -void EmitX64::EmitPackedSaturatedSubU8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubusb); +void EmitX64::EmitPackedSaturatedSubU8(EmitContext& ctx, IR::Inst* inst) { + EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubusb); } template -void EmitX64::EmitPackedSaturatedSubS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsb); +void EmitX64::EmitPackedSaturatedSubS8(EmitContext& ctx, IR::Inst* inst) { + EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubsb); } template -void EmitX64::EmitPackedSaturatedAddU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusw); +void EmitX64::EmitPackedSaturatedAddU16(EmitContext& ctx, IR::Inst* inst) { + EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddusw); } template -void EmitX64::EmitPackedSaturatedAddS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddsw); +void EmitX64::EmitPackedSaturatedAddS16(EmitContext& ctx, IR::Inst* inst) { + EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddsw); } template -void EmitX64::EmitPackedSaturatedSubU16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubusw); +void EmitX64::EmitPackedSaturatedSubU16(EmitContext& ctx, IR::Inst* inst) { + EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubusw); } template -void EmitX64::EmitPackedSaturatedSubS16(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsw); +void EmitX64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) { + EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubsw); } template -void EmitX64::EmitPackedAbsDiffSumS8(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psadbw); +void EmitX64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) { + EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psadbw); } template -void EmitX64::EmitPackedSelect(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); size_t num_args_in_xmm = args[0].IsInXmm() + args[1].IsInXmm() + args[2].IsInXmm(); if (num_args_in_xmm >= 2) { - Xbyak::Xmm ge = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm to = reg_alloc.UseXmm(args[1]); - Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[2]); + Xbyak::Xmm ge = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm to = ctx.reg_alloc.UseXmm(args[1]); + Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[2]); code->pand(from, ge); code->pandn(ge, to); code->por(from, ge); - reg_alloc.DefineValue(inst, from); + ctx.reg_alloc.DefineValue(inst, from); } else if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI1)) { - Xbyak::Reg32 ge = reg_alloc.UseGpr(args[0]).cvt32(); - Xbyak::Reg32 to = reg_alloc.UseScratchGpr(args[1]).cvt32(); - Xbyak::Reg32 from = reg_alloc.UseScratchGpr(args[2]).cvt32(); + Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32(); code->and_(from, ge); code->andn(to, ge, to); code->or_(from, to); - reg_alloc.DefineValue(inst, from); + ctx.reg_alloc.DefineValue(inst, from); } else { - Xbyak::Reg32 ge = reg_alloc.UseScratchGpr(args[0]).cvt32(); - Xbyak::Reg32 to = reg_alloc.UseGpr(args[1]).cvt32(); - Xbyak::Reg32 from = reg_alloc.UseScratchGpr(args[2]).cvt32(); + Xbyak::Reg32 ge = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32(); code->and_(from, ge); code->not_(ge); code->and_(ge, to); code->or_(from, ge); - reg_alloc.DefineValue(inst, from); + ctx.reg_alloc.DefineValue(inst, from); } } @@ -1915,222 +1918,221 @@ static void ZeroIfNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_ code->pand(xmm_value, xmm_scratch); } -static void FPThreeOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - auto args = reg_alloc.GetArgumentInfo(inst); +static void FPThreeOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm operand = reg_alloc.UseScratchXmm(args[1]); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); + Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32(); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { DenormalsAreZero32(code, result, gpr_scratch); DenormalsAreZero32(code, operand, gpr_scratch); } (code->*fn)(result, operand); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { FlushToZero32(code, result, gpr_scratch); } - if (A32::LocationDescriptor{block.Location()}.FPSCR().DN()) { + if (ctx.FPSCR_DN()) { DefaultNaN32(code, result); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } -static void FPThreeOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - auto args = reg_alloc.GetArgumentInfo(inst); +static void FPThreeOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm operand = reg_alloc.UseScratchXmm(args[1]); - Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); + Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { DenormalsAreZero64(code, result, gpr_scratch); DenormalsAreZero64(code, operand, gpr_scratch); } (code->*fn)(result, operand); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { FlushToZero64(code, result, gpr_scratch); } - if (A32::LocationDescriptor{block.Location()}.FPSCR().DN()) { + if (ctx.FPSCR_DN()) { DefaultNaN64(code, result); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } -static void FPTwoOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - auto args = reg_alloc.GetArgumentInfo(inst); +static void FPTwoOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32(); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { DenormalsAreZero32(code, result, gpr_scratch); } (code->*fn)(result, result); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { FlushToZero32(code, result, gpr_scratch); } - if (A32::LocationDescriptor{block.Location()}.FPSCR().DN()) { + if (ctx.FPSCR_DN()) { DefaultNaN32(code, result); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } -static void FPTwoOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { - auto args = reg_alloc.GetArgumentInfo(inst); +static void FPTwoOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { DenormalsAreZero64(code, result, gpr_scratch); } (code->*fn)(result, result); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { FlushToZero64(code, result, gpr_scratch); } - if (A32::LocationDescriptor{block.Location()}.FPSCR().DN()) { + if (ctx.FPSCR_DN()) { DefaultNaN64(code, result); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitTransferFromFP32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - reg_alloc.DefineValue(inst, args[0]); +void EmitX64::EmitTransferFromFP32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.DefineValue(inst, args[0]); } template -void EmitX64::EmitTransferFromFP64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - reg_alloc.DefineValue(inst, args[0]); +void EmitX64::EmitTransferFromFP64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.DefineValue(inst, args[0]); } template -void EmitX64::EmitTransferToFP32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitTransferToFP32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate() && args[0].GetImmediateU32() == 0) { - Xbyak::Xmm result = reg_alloc.ScratchXmm(); + Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code->xorps(result, result); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } else { - reg_alloc.DefineValue(inst, args[0]); + ctx.reg_alloc.DefineValue(inst, args[0]); } } template -void EmitX64::EmitTransferToFP64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); +void EmitX64::EmitTransferToFP64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate() && args[0].GetImmediateU64() == 0) { - Xbyak::Xmm result = reg_alloc.ScratchXmm(); + Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); code->xorps(result, result); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } else { - reg_alloc.DefineValue(inst, args[0]); + ctx.reg_alloc.DefineValue(inst, args[0]); } } template -void EmitX64::EmitFPAbs32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); +void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); code->pand(result, code->MConst(f32_non_sign_mask)); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitFPAbs64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); +void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); code->pand(result, code->MConst(f64_non_sign_mask)); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitFPNeg32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); +void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); code->pxor(result, code->MConst(f32_negative_zero)); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitFPNeg64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); +void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); code->pxor(result, code->MConst(f64_negative_zero)); - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitFPAdd32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::addss); +void EmitX64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::addss); } template -void EmitX64::EmitFPAdd64(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::addsd); +void EmitX64::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::addsd); } template -void EmitX64::EmitFPDiv32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::divss); +void EmitX64::EmitFPDiv32(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::divss); } template -void EmitX64::EmitFPDiv64(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::divsd); +void EmitX64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::divsd); } template -void EmitX64::EmitFPMul32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::mulss); +void EmitX64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::mulss); } template -void EmitX64::EmitFPMul64(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::mulsd); +void EmitX64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::mulsd); } template -void EmitX64::EmitFPSqrt32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - FPTwoOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::sqrtss); +void EmitX64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) { + FPTwoOp32(code, ctx, inst, &Xbyak::CodeGenerator::sqrtss); } template -void EmitX64::EmitFPSqrt64(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - FPTwoOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::sqrtsd); +void EmitX64::EmitFPSqrt64(EmitContext& ctx, IR::Inst* inst) { + FPTwoOp64(code, ctx, inst, &Xbyak::CodeGenerator::sqrtsd); } template -void EmitX64::EmitFPSub32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::subss); +void EmitX64::EmitFPSub32(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::subss); } template -void EmitX64::EmitFPSub64(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::subsd); +void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) { + FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::subsd); } -static void SetFpscrNzcvFromFlags(BlockOfCode* code, RegAlloc& reg_alloc) { - reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl - Xbyak::Reg32 nzcv = reg_alloc.ScratchGpr().cvt32(); - +static void SetFpscrNzcvFromFlags(BlockOfCode* code, EmitContext& ctx) { + ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl + Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32(); code->mov(nzcv, 0x28630000); code->sete(cl); @@ -2141,10 +2143,10 @@ static void SetFpscrNzcvFromFlags(BlockOfCode* code, RegAlloc& reg_alloc) { } template -void EmitX64::EmitFPCompare32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm reg_a = reg_alloc.UseXmm(args[0]); - Xbyak::Xmm reg_b = reg_alloc.UseXmm(args[1]); +void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm reg_a = ctx.reg_alloc.UseXmm(args[0]); + Xbyak::Xmm reg_b = ctx.reg_alloc.UseXmm(args[1]); bool exc_on_qnan = args[2].GetImmediateU1(); if (exc_on_qnan) { @@ -2153,14 +2155,14 @@ void EmitX64::EmitFPCompare32(RegAlloc& reg_alloc, IR::Block&, IR::Inst* in code->ucomiss(reg_a, reg_b); } - SetFpscrNzcvFromFlags(code, reg_alloc); + SetFpscrNzcvFromFlags(code, ctx); } template -void EmitX64::EmitFPCompare64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm reg_a = reg_alloc.UseXmm(args[0]); - Xbyak::Xmm reg_b = reg_alloc.UseXmm(args[1]); +void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm reg_a = ctx.reg_alloc.UseXmm(args[0]); + Xbyak::Xmm reg_b = ctx.reg_alloc.UseXmm(args[1]); bool exc_on_qnan = args[2].GetImmediateU1(); if (exc_on_qnan) { @@ -2169,61 +2171,61 @@ void EmitX64::EmitFPCompare64(RegAlloc& reg_alloc, IR::Block&, IR::Inst* in code->ucomisd(reg_a, reg_b); } - SetFpscrNzcvFromFlags(code, reg_alloc); + SetFpscrNzcvFromFlags(code, ctx); } template -void EmitX64::EmitFPSingleToDouble(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); +void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { DenormalsAreZero32(code, result, gpr_scratch.cvt32()); } code->cvtss2sd(result, result); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { FlushToZero64(code, result, gpr_scratch); } - if (A32::LocationDescriptor{block.Location()}.FPSCR().DN()) { + if (ctx.FPSCR_DN()) { DefaultNaN64(code, result); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitFPDoubleToSingle(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr(); +void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { DenormalsAreZero64(code, result, gpr_scratch); } code->cvtsd2ss(result, result); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { FlushToZero32(code, result, gpr_scratch.cvt32()); } - if (A32::LocationDescriptor{block.Location()}.FPSCR().DN()) { + if (ctx.FPSCR_DN()) { DefaultNaN32(code, result); } - reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(inst, result); } template -void EmitX64::EmitFPSingleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm(); +void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm(); bool round_towards_zero = args[1].GetImmediateU1(); // ARM saturates on conversion; this differs from x64 which returns a sentinel value. // Conversion to double is lossless, and allows for clamping. - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { DenormalsAreZero32(code, from, to); } code->cvtss2sd(from, from); @@ -2244,15 +2246,15 @@ void EmitX64::EmitFPSingleToS32(RegAlloc& reg_alloc, IR::Block& block, IR:: code->cvtsd2si(to, from); // 32 bit gpr } - reg_alloc.DefineValue(inst, to); + ctx.reg_alloc.DefineValue(inst, to); } template -void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm(); +void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm(); bool round_towards_zero = args[1].GetImmediateU1(); // ARM saturates on conversion; this differs from x64 which returns a sentinel value. @@ -2262,8 +2264,8 @@ void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR:: // // FIXME: Inexact exception not correctly signalled with the below code - if (A32::LocationDescriptor{block.Location()}.FPSCR().RMode() != A32::FPSCR::RoundingMode::TowardsZero && !round_towards_zero) { - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (!ctx.FPSCR_RoundTowardsZero() && !round_towards_zero) { + if (ctx.FPSCR_FTZ()) { DenormalsAreZero32(code, from, to); } code->cvtss2sd(from, from); @@ -2280,10 +2282,10 @@ void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR:: // Bring back into original range code->add(to, u32(2147483648u)); } else { - Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm(); - Xbyak::Reg32 gpr_mask = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm xmm_mask = ctx.reg_alloc.ScratchXmm(); + Xbyak::Reg32 gpr_mask = ctx.reg_alloc.ScratchGpr().cvt32(); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { DenormalsAreZero32(code, from, to); } code->cvtss2sd(from, from); @@ -2307,21 +2309,21 @@ void EmitX64::EmitFPSingleToU32(RegAlloc& reg_alloc, IR::Block& block, IR:: code->add(to, gpr_mask); } - reg_alloc.DefineValue(inst, to); + ctx.reg_alloc.DefineValue(inst, to); } template -void EmitX64::EmitFPDoubleToS32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm(); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); +void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm(); + Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32(); bool round_towards_zero = args[1].GetImmediateU1(); // ARM saturates on conversion; this differs from x64 which returns a sentinel value. - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { DenormalsAreZero64(code, from, gpr_scratch.cvt64()); } // First time is to set flags @@ -2341,24 +2343,24 @@ void EmitX64::EmitFPDoubleToS32(RegAlloc& reg_alloc, IR::Block& block, IR:: code->cvtsd2si(to, from); // 32 bit gpr } - reg_alloc.DefineValue(inst, to); + ctx.reg_alloc.DefineValue(inst, to); } template -void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm from = reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg32 to = reg_alloc.ScratchGpr().cvt32(); - Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm(); - Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32(); +void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm(); + Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32(); bool round_towards_zero = args[1].GetImmediateU1(); // ARM saturates on conversion; this differs from x64 which returns a sentinel value. // TODO: Use VCVTPD2UDQ when AVX512VL is available. // FIXME: Inexact exception not correctly signalled with the below code - if (A32::LocationDescriptor{block.Location()}.FPSCR().RMode() != A32::FPSCR::RoundingMode::TowardsZero && !round_towards_zero) { - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (!ctx.FPSCR_RoundTowardsZero() && !round_towards_zero) { + if (ctx.FPSCR_FTZ()) { DenormalsAreZero64(code, from, gpr_scratch.cvt64()); } ZeroIfNaN64(code, from, xmm_scratch); @@ -2374,10 +2376,10 @@ void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR:: // Bring back into original range code->add(to, u32(2147483648u)); } else { - Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm(); - Xbyak::Reg32 gpr_mask = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Xmm xmm_mask = ctx.reg_alloc.ScratchXmm(); + Xbyak::Reg32 gpr_mask = ctx.reg_alloc.ScratchGpr().cvt32(); - if (A32::LocationDescriptor{block.Location()}.FPSCR().FTZ()) { + if (ctx.FPSCR_FTZ()) { DenormalsAreZero64(code, from, gpr_scratch.cvt64()); } ZeroIfNaN64(code, from, xmm_scratch); @@ -2400,27 +2402,27 @@ void EmitX64::EmitFPDoubleToU32(RegAlloc& reg_alloc, IR::Block& block, IR:: code->add(to, gpr_mask); } - reg_alloc.DefineValue(inst, to); + ctx.reg_alloc.DefineValue(inst, to); } template -void EmitX64::EmitFPS32ToSingle(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 from = reg_alloc.UseGpr(args[0]).cvt32(); - Xbyak::Xmm to = reg_alloc.ScratchXmm(); +void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(); bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); code->cvtsi2ss(to, from); - reg_alloc.DefineValue(inst, to); + ctx.reg_alloc.DefineValue(inst, to); } template -void EmitX64::EmitFPU32ToSingle(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 from = reg_alloc.UseGpr(args[0]); - Xbyak::Xmm to = reg_alloc.ScratchXmm(); +void EmitX64::EmitFPU32ToSingle(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(); bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); @@ -2428,27 +2430,27 @@ void EmitX64::EmitFPU32ToSingle(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary code->cvtsi2ss(to, from); - reg_alloc.DefineValue(inst, to); + ctx.reg_alloc.DefineValue(inst, to); } template -void EmitX64::EmitFPS32ToDouble(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg32 from = reg_alloc.UseGpr(args[0]).cvt32(); - Xbyak::Xmm to = reg_alloc.ScratchXmm(); +void EmitX64::EmitFPS32ToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(); bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); code->cvtsi2sd(to, from); - reg_alloc.DefineValue(inst, to); + ctx.reg_alloc.DefineValue(inst, to); } template -void EmitX64::EmitFPU32ToDouble(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) { - auto args = reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 from = reg_alloc.UseGpr(args[0]); - Xbyak::Xmm to = reg_alloc.ScratchXmm(); +void EmitX64::EmitFPU32ToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(); bool round_to_nearest = args[1].GetImmediateU1(); ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); @@ -2456,7 +2458,7 @@ void EmitX64::EmitFPU32ToDouble(RegAlloc& reg_alloc, IR::Block&, IR::Inst* code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary code->cvtsi2sd(to, from); - reg_alloc.DefineValue(inst, to); + ctx.reg_alloc.DefineValue(inst, to); } template diff --git a/src/backend_x64/emit_x64.h b/src/backend_x64/emit_x64.h index 979d5a55..2d413c96 100644 --- a/src/backend_x64/emit_x64.h +++ b/src/backend_x64/emit_x64.h @@ -34,6 +34,19 @@ namespace BackendX64 { class BlockOfCode; +struct EmitContext { + EmitContext(RegAlloc& reg_alloc, IR::Block& block); + + void EraseInstruction(IR::Inst* inst); + + virtual bool FPSCR_RoundTowardsZero() const = 0; + virtual bool FPSCR_FTZ() const = 0; + virtual bool FPSCR_DN() const = 0; + + RegAlloc& reg_alloc; + IR::Block& block; +}; + template class EmitX64 { public: @@ -58,7 +71,7 @@ public: protected: // Microinstruction emitters -#define OPCODE(name, type, ...) void Emit##name(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst); +#define OPCODE(name, type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst); #define A32OPC(...) #include "frontend/ir/opcodes.inc" #undef OPCODE