From 776208742b22558e16406a8afd32538f89e77112 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Wed, 9 Jun 2021 09:44:55 -0700 Subject: [PATCH] emit_x64_{vector_}floating_point: Centralize implementation of FP{Vector}{Abs,Neg} Removes dependency on the constants at the top of some files such as `f16_negative_zero` and `f32_non_sign_mask` in favor of the `FPInfo` trait-type. Also removes bypass delays by selecting between instructions such as `pand`, `andps`, or `andpd` depending on the type and keeps them in their respective uop domain. See https://www.agner.org/optimize/instruction_tables.pdf for more info on bypass delays. --- .../backend/x64/emit_x64_floating_point.cpp | 55 +++++++-------- .../x64/emit_x64_vector_floating_point.cpp | 70 ++++++++----------- 2 files changed, 55 insertions(+), 70 deletions(-) diff --git a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index dfe834fa..5e740469 100644 --- a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -39,9 +39,6 @@ namespace { const Xbyak::Reg64 INVALID_REG = Xbyak::Reg64(-1); -constexpr u64 f16_negative_zero = 0x8000; -constexpr u64 f16_non_sign_mask = 0x7fff; - constexpr u64 f32_negative_zero = 0x80000000u; constexpr u64 f32_nan = 0x7fc00000u; constexpr u64 f32_non_sign_mask = 0x7fffffffu; @@ -328,58 +325,56 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) } // anonymous namespace -void EmitX64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) { +template +void FPAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + using FPT = mp::unsigned_integer_of_size; + constexpr FPT non_sign_mask = FP::FPInfo::sign_mask - FPT(1u); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Address mask = code.MConst(xword, non_sign_mask); - code.pand(result, code.MConst(xword, f16_non_sign_mask)); + code.andps(result, mask); ctx.reg_alloc.DefineValue(inst, result); } +void EmitX64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) { + FPAbs<16>(code, ctx, inst); +} + void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - - code.pand(result, code.MConst(xword, f32_non_sign_mask)); - - ctx.reg_alloc.DefineValue(inst, result); + FPAbs<32>(code, ctx, inst); } void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) { + FPAbs<64>(code, ctx, inst); +} + +template +void FPNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + using FPT = mp::unsigned_integer_of_size; + constexpr FPT sign_mask = FP::FPInfo::sign_mask; + auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Address mask = code.MConst(xword, u64(sign_mask)); - code.pand(result, code.MConst(xword, f64_non_sign_mask)); + code.xorps(result, mask); ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPNeg16(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - - code.pxor(result, code.MConst(xword, f16_negative_zero)); - - ctx.reg_alloc.DefineValue(inst, result); + FPNeg<16>(code, ctx, inst); } void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - - code.pxor(result, code.MConst(xword, f32_negative_zero)); - - ctx.reg_alloc.DefineValue(inst, result); + FPNeg<32>(code, ctx, inst); } void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - - code.pxor(result, code.MConst(xword, f64_negative_zero)); - - ctx.reg_alloc.DefineValue(inst, result); + FPNeg<64>(code, ctx, inst); } void EmitX64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index 9df3e61f..aeba7e2b 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -557,37 +557,32 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam } // anonymous namespace -void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) { +template +void FPVectorAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + using FPT = mp::unsigned_integer_of_size; + constexpr FPT non_sign_mask = FP::FPInfo::sign_mask - FPT(1u); + constexpr u64 non_sign_mask64 = Common::Replicate(non_sign_mask, fsize); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, 0x7FFF7FFF7FFF7FFF, 0x7FFF7FFF7FFF7FFF); - - code.pand(a, mask); - - ctx.reg_alloc.DefineValue(inst, a); -} - -void EmitX64::EmitFPVectorAbs32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF); + const Xbyak::Address mask = code.MConst(xword, non_sign_mask64, non_sign_mask64); code.andps(a, mask); ctx.reg_alloc.DefineValue(inst, a); } +void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) { + FPVectorAbs<16>(code, ctx, inst); +} + +void EmitX64::EmitFPVectorAbs32(EmitContext& ctx, IR::Inst* inst) { + FPVectorAbs<32>(code, ctx, inst); +} + void EmitX64::EmitFPVectorAbs64(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF); - - code.andpd(a, mask); - - ctx.reg_alloc.DefineValue(inst, a); + FPVectorAbs<64>(code, ctx, inst); } void EmitX64::EmitFPVectorAdd32(EmitContext& ctx, IR::Inst* inst) { @@ -1229,37 +1224,32 @@ void EmitX64::EmitFPVectorMulX64(EmitContext& ctx, IR::Inst* inst) { EmitFPVectorMulX<64>(code, ctx, inst); } -void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) { +template +void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + using FPT = mp::unsigned_integer_of_size; + constexpr FPT sign_mask = FP::FPInfo::sign_mask; + constexpr u64 sign_mask64 = Common::Replicate(sign_mask, fsize); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, 0x8000800080008000, 0x8000800080008000); + const Xbyak::Address mask = code.MConst(xword, sign_mask64, sign_mask64); - code.pxor(a, mask); + code.xorps(a, mask); ctx.reg_alloc.DefineValue(inst, a); } +void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) { + FPVectorNeg<16>(code, ctx, inst); +} + void EmitX64::EmitFPVectorNeg32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, 0x8000000080000000, 0x8000000080000000); - - code.pxor(a, mask); - - ctx.reg_alloc.DefineValue(inst, a); + FPVectorNeg<32>(code, ctx, inst); } void EmitX64::EmitFPVectorNeg64(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Address mask = code.MConst(xword, 0x8000000000000000, 0x8000000000000000); - - code.pxor(a, mask); - - ctx.reg_alloc.DefineValue(inst, a); + FPVectorNeg<64>(code, ctx, inst); } void EmitX64::EmitFPVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {