diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 7b074fa0..d87f6f82 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -1055,6 +1055,19 @@ void EmitX64::EmitFPS32ToDouble(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, to); } +void EmitX64::EmitFPS64ToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const bool round_to_nearest = args[1].GetImmediateU1(); + ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); + + code.cvtsi2sd(result, from); + + ctx.reg_alloc.DefineValue(inst, result); +} + void EmitX64::EmitFPU32ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); @@ -1069,4 +1082,26 @@ void EmitX64::EmitFPU32ToDouble(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, to); } +void EmitX64::EmitFPU64ToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const bool round_to_nearest = args[1].GetImmediateU1(); + ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); + + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512F)) { + code.vcvtusi2sd(result, result, from); + } else { + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + + code.movq(tmp, from); + code.punpckldq(tmp, code.MConst(xword, 0x4530000043300000, 0)); + code.subpd(tmp, code.MConst(xword, 0x4330000000000000, 0x4530000000000000)); + code.pshufd(result, tmp, 0b01001110); + code.addpd(result, tmp); + } + + ctx.reg_alloc.DefineValue(inst, result); +} } // namespace Dynarmic::BackendX64 diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index fd62570b..8b65994f 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1476,6 +1476,11 @@ U32 IREmitter::FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_con return Inst(Opcode::FPS32ToSingle, a, Imm1(round_to_nearest)); } +U64 IREmitter::FPS64ToDouble(const U64& a, bool round_to_nearest, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(Opcode::FPS64ToDouble, a, Imm1(round_to_nearest)); +} + U32 IREmitter::FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) { ASSERT(fpscr_controlled); return Inst(Opcode::FPU32ToSingle, a, Imm1(round_to_nearest)); @@ -1491,6 +1496,11 @@ U64 IREmitter::FPU32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_con return Inst(Opcode::FPU32ToDouble, a, Imm1(round_to_nearest)); } +U64 IREmitter::FPU64ToDouble(const U64& a, bool round_to_nearest, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(Opcode::FPU64ToDouble, a, Imm1(round_to_nearest)); +} + U128 IREmitter::FPVectorAbs(size_t esize, const U128& a) { switch (esize) { case 16: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 1559c5a6..139fb573 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -271,7 +271,9 @@ public: U32 FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled); U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled); U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled); + U64 FPS64ToDouble(const U64& a, bool round_to_nearest, bool fpscr_controlled); U64 FPU32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled); + U64 FPU64ToDouble(const U64& a, bool round_to_nearest, bool fpscr_controlled); U128 FPVectorAbs(size_t esize, const U128& a); U128 FPVectorAbsoluteDifference(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 2d9dac72..3b52ac00 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -393,7 +393,9 @@ OPCODE(FPDoubleToS32, T::U32, T::U64, T::U OPCODE(FPU32ToSingle, T::U32, T::U32, T::U1 ) OPCODE(FPS32ToSingle, T::U32, T::U32, T::U1 ) OPCODE(FPU32ToDouble, T::U64, T::U32, T::U1 ) +OPCODE(FPU64ToDouble, T::U64, T::U64, T::U1 ) OPCODE(FPS32ToDouble, T::U64, T::U32, T::U1 ) +OPCODE(FPS64ToDouble, T::U64, T::U64, T::U1 ) // Floating-point vector instructions OPCODE(FPVectorAbs16, T::U128, T::U128 )