emit_x64_{vector_}floating_point: Centralize implementation of FP{Vector}{Abs,Neg}

Removes dependency on the constants at the top of some files such as `f16_negative_zero` and `f32_non_sign_mask` in favor of the `FPInfo` trait-type. Also removes bypass delays by selecting between instructions such as `pand`, `andps`, or `andpd` depending on the type and keeps them in their respective uop domain. See https://www.agner.org/optimize/instruction_tables.pdf for more info on bypass delays.
2021-06-09 09:44:55 -07:00 · 2021-06-09 09:44:55 -07:00 · 776208742b
commit 776208742b
parent 759459e181
2 changed files with 55 additions and 70 deletions
--- a/src/dynarmic/backend/x64/emit_x64_floating_point.cpp
+++ b/src/dynarmic/backend/x64/emit_x64_floating_point.cpp
@ -39,9 +39,6 @@ namespace {
 const Xbyak::Reg64 INVALID_REG = Xbyak::Reg64(-1);
 constexpr u64 f16_negative_zero = 0x8000;
 constexpr u64 f16_non_sign_mask = 0x7fff;
 constexpr u64 f32_negative_zero = 0x80000000u;
 constexpr u64 f32_nan = 0x7fc00000u;
 constexpr u64 f32_non_sign_mask = 0x7fffffffu;
@ -328,58 +325,56 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
 }  // anonymous namespace
-void EmitX64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) {
+template<size_t fsize>
 void FPAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
    using FPT = mp::unsigned_integer_of_size<fsize>;
    constexpr FPT non_sign_mask = FP::FPInfo<FPT>::sign_mask - FPT(1u);
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    const Xbyak::Address mask = code.MConst(xword, non_sign_mask);
-    code.pand(result, code.MConst(xword, f16_non_sign_mask));
+    code.andps(result, mask);
    ctx.reg_alloc.DefineValue(inst, result);
 }
 void EmitX64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) {
    FPAbs<16>(code, ctx, inst);
 }
 void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    FPAbs<32>(code, ctx, inst);
    const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    code.pand(result, code.MConst(xword, f32_non_sign_mask));
    ctx.reg_alloc.DefineValue(inst, result);
 }
 void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) {
    FPAbs<64>(code, ctx, inst);
 }
 template<size_t fsize>
 void FPNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
    using FPT = mp::unsigned_integer_of_size<fsize>;
    constexpr FPT sign_mask = FP::FPInfo<FPT>::sign_mask;
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    const Xbyak::Address mask = code.MConst(xword, u64(sign_mask));
-    code.pand(result, code.MConst(xword, f64_non_sign_mask));
+    code.xorps(result, mask);
    ctx.reg_alloc.DefineValue(inst, result);
 }
 void EmitX64::EmitFPNeg16(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    FPNeg<16>(code, ctx, inst);
    const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    code.pxor(result, code.MConst(xword, f16_negative_zero));
    ctx.reg_alloc.DefineValue(inst, result);
 }
 void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    FPNeg<32>(code, ctx, inst);
    const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    code.pxor(result, code.MConst(xword, f32_negative_zero));
    ctx.reg_alloc.DefineValue(inst, result);
 }
 void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    FPNeg<64>(code, ctx, inst);
    const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    code.pxor(result, code.MConst(xword, f64_negative_zero));
    ctx.reg_alloc.DefineValue(inst, result);
 }
 void EmitX64::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst) {
--- a/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp
+++ b/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp
@ -557,37 +557,32 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam
 }  // anonymous namespace
-void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) {
+template<size_t fsize>
 void FPVectorAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
    using FPT = mp::unsigned_integer_of_size<fsize>;
    constexpr FPT non_sign_mask = FP::FPInfo<FPT>::sign_mask - FPT(1u);
    constexpr u64 non_sign_mask64 = Common::Replicate<u64>(non_sign_mask, fsize);
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
-    const Xbyak::Address mask = code.MConst(xword, 0x7FFF7FFF7FFF7FFF, 0x7FFF7FFF7FFF7FFF);
+    const Xbyak::Address mask = code.MConst(xword, non_sign_mask64, non_sign_mask64);
    code.pand(a, mask);
    ctx.reg_alloc.DefineValue(inst, a);
 }
 void EmitX64::EmitFPVectorAbs32(EmitContext& ctx, IR::Inst* inst) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
    const Xbyak::Address mask = code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF);
    code.andps(a, mask);
    ctx.reg_alloc.DefineValue(inst, a);
 }
 void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) {
    FPVectorAbs<16>(code, ctx, inst);
 }
 void EmitX64::EmitFPVectorAbs32(EmitContext& ctx, IR::Inst* inst) {
    FPVectorAbs<32>(code, ctx, inst);
 }
 void EmitX64::EmitFPVectorAbs64(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    FPVectorAbs<64>(code, ctx, inst);
    const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
    const Xbyak::Address mask = code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF);
    code.andpd(a, mask);
    ctx.reg_alloc.DefineValue(inst, a);
 }
 void EmitX64::EmitFPVectorAdd32(EmitContext& ctx, IR::Inst* inst) {
@ -1229,37 +1224,32 @@ void EmitX64::EmitFPVectorMulX64(EmitContext& ctx, IR::Inst* inst) {
    EmitFPVectorMulX<64>(code, ctx, inst);
 }
-void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) {
+template<size_t fsize>
 void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
    using FPT = mp::unsigned_integer_of_size<fsize>;
    constexpr FPT sign_mask = FP::FPInfo<FPT>::sign_mask;
    constexpr u64 sign_mask64 = Common::Replicate<u64>(sign_mask, fsize);
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
-    const Xbyak::Address mask = code.MConst(xword, 0x8000800080008000, 0x8000800080008000);
+    const Xbyak::Address mask = code.MConst(xword, sign_mask64, sign_mask64);
-    code.pxor(a, mask);
+    code.xorps(a, mask);
    ctx.reg_alloc.DefineValue(inst, a);
 }
 void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) {
    FPVectorNeg<16>(code, ctx, inst);
 }
 void EmitX64::EmitFPVectorNeg32(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    FPVectorNeg<32>(code, ctx, inst);
    const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
    const Xbyak::Address mask = code.MConst(xword, 0x8000000080000000, 0x8000000080000000);
    code.pxor(a, mask);
    ctx.reg_alloc.DefineValue(inst, a);
 }
 void EmitX64::EmitFPVectorNeg64(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    FPVectorNeg<64>(code, ctx, inst);
    const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
    const Xbyak::Address mask = code.MConst(xword, 0x8000000000000000, 0x8000000000000000);
    code.pxor(a, mask);
    ctx.reg_alloc.DefineValue(inst, a);
 }
 void EmitX64::EmitFPVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {