From 917335ae8a29359b57f30ea0846fe7d7bcbfbff7 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Tue, 2 Jan 2024 12:55:37 -0800 Subject: [PATCH] block_of_code: Add `XmmBConst` This is a redo of https://github.com/merryhime/dynarmic/pull/690 with a much smaller foot-print to introduce a new pattern while avoiding the initial bugs (https://github.com/merryhime/dynarmic/commit/5d9b720189a64eec7f35f844320d0b30ca3997f3) **B**roadcasts a value as an **Xmm**-sized **Const**ant. Intended to eventually encourage more hits within the constant-pool between vector and non-vector code. --- src/dynarmic/backend/x64/block_of_code.h | 7 ++++ .../backend/x64/emit_x64_floating_point.cpp | 3 +- .../x64/emit_x64_vector_floating_point.cpp | 38 ++++++------------- .../x64/emit_x64_vector_saturation.cpp | 4 +- 4 files changed, 21 insertions(+), 31 deletions(-) diff --git a/src/dynarmic/backend/x64/block_of_code.h b/src/dynarmic/backend/x64/block_of_code.h index c6223c02..013ae74c 100644 --- a/src/dynarmic/backend/x64/block_of_code.h +++ b/src/dynarmic/backend/x64/block_of_code.h @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -124,6 +125,12 @@ public: Xbyak::Address XmmConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0); + template + Xbyak::Address XmmBConst(const Xbyak::AddressFrame& frame, u64 value) { + return XmmConst(frame, mcl::bit::replicate_element(esize, value), + mcl::bit::replicate_element(esize, value)); + } + CodePtr GetCodeBegin() const; size_t GetTotalCodeSize() const; diff --git a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 21149139..c2e58d81 100644 --- a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -89,10 +89,9 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list t FpFixup::Norm_Src, FpFixup::Norm_Src, FpFixup::Norm_Src); - constexpr u64 denormal_to_zero64 = mcl::bit::replicate_element(denormal_to_zero); const Xbyak::Xmm tmp = xmm16; - FCODE(vmovap)(tmp, code.XmmConst(xword, u64(denormal_to_zero64), u64(denormal_to_zero64))); + FCODE(vmovap)(tmp, code.XmmBConst(xword, denormal_to_zero)); for (const Xbyak::Xmm& xmm : to_daz) { FCODE(vfixupimms)(xmm, xmm, tmp, u8(0)); diff --git a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index d5e01dd3..1ff6dec7 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -145,26 +145,12 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std:: template Xbyak::Address GetVectorOf(BlockOfCode& code, u64 value) { - if constexpr (fsize == 16) { - return code.XmmConst(xword, (value << 48) | (value << 32) | (value << 16) | value, (value << 48) | (value << 32) | (value << 16) | value); - } else if constexpr (fsize == 32) { - return code.XmmConst(xword, (value << 32) | value, (value << 32) | value); - } else { - static_assert(fsize == 64); - return code.XmmConst(xword, value, value); - } + return code.XmmBConst(xword, value); } template Xbyak::Address GetVectorOf(BlockOfCode& code) { - if constexpr (fsize == 16) { - return code.XmmConst(xword, (value << 48) | (value << 32) | (value << 16) | value, (value << 48) | (value << 32) | (value << 16) | value); - } else if constexpr (fsize == 32) { - return code.XmmConst(xword, (value << 32) | value, (value << 32) | value); - } else { - static_assert(fsize == 64); - return code.XmmConst(xword, value, value); - } + return code.XmmBConst(xword, value); } template @@ -227,7 +213,7 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { constexpr u32 nan_to_zero = FixupLUT(FpFixup::PosZero, FpFixup::PosZero); - FCODE(vfixupimmp)(result, result, code.XmmConst(ptr_b, u64(nan_to_zero)), u8(0)); + FCODE(vfixupimmp)(result, result, code.XmmBConst<32>(ptr_b, nan_to_zero), u8(0)); } else if (code.HasHostFeature(HostFeature::AVX)) { FCODE(vcmpordp)(nan_mask, result, result); FCODE(vandp)(result, result, nan_mask); @@ -251,9 +237,8 @@ void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list(denormal_to_zero); - FCODE(vmovap)(tmp, code.XmmConst(xword, u64(denormal_to_zero64), u64(denormal_to_zero64))); + FCODE(vmovap)(tmp, code.XmmBConst(xword, denormal_to_zero)); for (const Xbyak::Xmm& xmm : to_daz) { FCODE(vfixupimmp)(xmm, xmm, tmp, u8(0)); @@ -800,9 +785,9 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { code.vcvtudq2ps(xmm, xmm); } else { - const Xbyak::Address mem_4B000000 = code.XmmConst(xword, 0x4B0000004B000000, 0x4B0000004B000000); - const Xbyak::Address mem_53000000 = code.XmmConst(xword, 0x5300000053000000, 0x5300000053000000); - const Xbyak::Address mem_D3000080 = code.XmmConst(xword, 0xD3000080D3000080, 0xD3000080D3000080); + const Xbyak::Address mem_4B000000 = code.XmmBConst<32>(xword, 0x4B000000); + const Xbyak::Address mem_53000000 = code.XmmBConst<32>(xword, 0x53000000); + const Xbyak::Address mem_D3000080 = code.XmmBConst<32>(xword, 0xD3000080); const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); @@ -813,7 +798,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) code.vaddps(xmm, xmm, mem_D3000080); code.vaddps(xmm, tmp, xmm); } else { - const Xbyak::Address mem_0xFFFF = code.XmmConst(xword, 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF); + const Xbyak::Address mem_0xFFFF = code.XmmBConst<32>(xword, 0x0000FFFF); code.movdqa(tmp, mem_0xFFFF); @@ -831,7 +816,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) } if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) { - code.pand(xmm, code.XmmConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF)); + code.pand(xmm, code.XmmBConst<32>(xword, 0x7FFFFFFF)); } }); @@ -898,7 +883,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) } if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) { - code.pand(xmm, code.XmmConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF)); + code.pand(xmm, code.XmmBConst<64>(xword, 0x7FFFFFFFFFFFFFFF)); } }); @@ -1504,12 +1489,11 @@ template void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { using FPT = mcl::unsigned_integer_of_size; constexpr FPT sign_mask = FP::FPInfo::sign_mask; - constexpr u64 sign_mask64 = mcl::bit::replicate_element(sign_mask); auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.XmmConst(xword, sign_mask64, sign_mask64); + const Xbyak::Address mask = code.XmmBConst(xword, sign_mask); code.xorps(a, mask); diff --git a/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp b/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp index 9d869d72..398f1669 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp @@ -97,7 +97,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.vpmovq2m(k1, xmm0); } ICODE(vpsra)(result | k1, result, u8(esize - 1)); - ICODE(vpxor)(result | k1, result, code.XmmConst(xword_b, msb_mask, msb_mask)); + ICODE(vpxor)(result | k1, result, code.XmmBConst(xword_b, msb_mask)); code.ktestb(k1, k1); code.setnz(overflow); @@ -148,7 +148,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in if constexpr (esize == 64) { code.pshufd(tmp, tmp, 0b11110101); } - code.pxor(tmp, code.XmmConst(xword, msb_mask, msb_mask)); + code.pxor(tmp, code.XmmBConst(xword, msb_mask)); if (code.HasHostFeature(HostFeature::SSE41)) { code.ptest(xmm0, code.XmmConst(xword, msb_mask, msb_mask));