diff --git a/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/backend/x64/emit_x64.cpp index ab160eca..f203d104 100644 --- a/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/backend/x64/emit_x64.cpp @@ -32,6 +32,8 @@ using namespace Xbyak::util; EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block) : reg_alloc(reg_alloc), block(block) {} +EmitContext::~EmitContext() = default; + size_t EmitContext::GetInstOffset(IR::Inst* inst) const { return static_cast(std::distance(block.begin(), IR::Block::iterator(inst))); } diff --git a/src/dynarmic/backend/x64/emit_x64.h b/src/dynarmic/backend/x64/emit_x64.h index 24fbcdc3..6547869d 100644 --- a/src/dynarmic/backend/x64/emit_x64.h +++ b/src/dynarmic/backend/x64/emit_x64.h @@ -51,6 +51,7 @@ using HalfVectorArray = std::array(stack_space + ABI_SHADOW_SPACE)); + for (size_t i = 0; i < xmms.size(); ++i) { + code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], xmms[i]); + } + code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.mov(code.ABI_PARAM2, ctx.FPCR(fpcr_controlled).Value()); - code.CallFunction(nan_handler); + code.CallFunction(nan_handler); - code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); - code.add(rsp, stack_space + ABI_SHADOW_SPACE); - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.add(rsp, 8); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.add(rsp, static_cast(stack_space + ABI_SHADOW_SPACE)); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); + code.jmp(*end, code.T_NEAR); + }); } template @@ -1117,7 +1116,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - Xbyak::Label end, fallback; + SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.movaps(result, xmm_a); @@ -1127,19 +1126,19 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.andnps(tmp, result); FCODE(vcmpeq_uqp)(tmp, tmp, GetSmallestNormalVector(code)); code.vptest(tmp, tmp); - code.jnz(fallback, code.T_NEAR); - code.L(end); + code.jnz(*fallback, code.T_NEAR); + code.L(*end); }); - code.SwitchToFarCode(); - code.L(fallback); - code.sub(rsp, 8); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, xmm_a, xmm_b, xmm_c, fallback_fn, fpcr_controlled); - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.add(rsp, 8); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + code.L(*fallback); + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, xmm_a, xmm_b, xmm_c, fallback_fn, fpcr_controlled); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); + code.jmp(*end, code.T_NEAR); + }); ctx.reg_alloc.DefineValue(inst, result); return; @@ -1377,7 +1376,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - Xbyak::Label end, fallback; + SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.movaps(result, GetVectorOf(code)); @@ -1385,19 +1384,19 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in FCODE(vcmpunordp)(tmp, result, result); code.vptest(tmp, tmp); - code.jnz(fallback, code.T_NEAR); - code.L(end); + code.jnz(*fallback, code.T_NEAR); + code.L(*end); }); - code.SwitchToFarCode(); - code.L(fallback); - code.sub(rsp, 8); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, operand1, operand2, fallback_fn, fpcr_controlled); - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.add(rsp, 8); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + code.L(*fallback); + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, operand1, operand2, fallback_fn, fpcr_controlled); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); + code.jmp(*end, code.T_NEAR); + }); ctx.reg_alloc.DefineValue(inst, result); return; @@ -1591,7 +1590,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); - Xbyak::Label end, fallback; + SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.vmovaps(result, GetVectorOf(code)); @@ -1602,21 +1601,21 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in FCODE(vandp)(tmp, result, mask); ICODE(vpcmpeq)(tmp, tmp, mask); code.ptest(tmp, tmp); - code.jnz(fallback, code.T_NEAR); + code.jnz(*fallback, code.T_NEAR); FCODE(vmulp)(result, result, GetVectorOf(code)); - code.L(end); + code.L(*end); }); - code.SwitchToFarCode(); - code.L(fallback); - code.sub(rsp, 8); - ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, operand1, operand2, fallback_fn, fpcr_controlled); - ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); - code.add(rsp, 8); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); + ctx.deferred_emits.emplace_back([=, &code, &ctx] { + code.L(*fallback); + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, operand1, operand2, fallback_fn, fpcr_controlled); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); + code.jmp(*end, code.T_NEAR); + }); ctx.reg_alloc.DefineValue(inst, result); return;