diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 9dc5c618..12ac6fda 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -1074,6 +1074,19 @@ void EmitX64::EmitFPS64ToDouble(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +void EmitX64::EmitFPS64ToSingle(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const bool round_to_nearest = args[1].GetImmediateU1(); + ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); + + code.cvtsi2ss(result, from); + + ctx.reg_alloc.DefineValue(inst, result); +} + void EmitX64::EmitFPU32ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -1119,4 +1132,42 @@ void EmitX64::EmitFPU64ToDouble(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } + +void EmitX64::EmitFPU64ToSingle(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const bool round_to_nearest = args[1].GetImmediateU1(); + ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); + + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512F)) { + const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); + code.vcvtusi2ss(result, result, from); + } else { + const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]); + code.pxor(result, result); + + Xbyak::Label negative; + Xbyak::Label end; + + code.test(from, from); + code.js(negative); + + code.cvtsi2ss(result, from); + code.jmp(end); + + code.L(negative); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + code.mov(tmp, from); + code.shr(tmp, 1); + code.and_(from.cvt32(), 1); + code.or_(from, tmp); + code.cvtsi2ss(result, from); + code.addss(result, result); + + code.L(end); + } + + ctx.reg_alloc.DefineValue(inst, result); +} } // namespace Dynarmic::BackendX64 diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 8b65994f..10a05fab 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1481,6 +1481,11 @@ U64 IREmitter::FPS64ToDouble(const U64& a, bool round_to_nearest, bool fpscr_con return Inst(Opcode::FPS64ToDouble, a, Imm1(round_to_nearest)); } +U32 IREmitter::FPS64ToSingle(const U64& a, bool round_to_nearest, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(Opcode::FPS64ToSingle, a, Imm1(round_to_nearest)); +} + U32 IREmitter::FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) { ASSERT(fpscr_controlled); return Inst(Opcode::FPU32ToSingle, a, Imm1(round_to_nearest)); @@ -1501,6 +1506,11 @@ U64 IREmitter::FPU64ToDouble(const U64& a, bool round_to_nearest, bool fpscr_con return Inst(Opcode::FPU64ToDouble, a, Imm1(round_to_nearest)); } +U32 IREmitter::FPU64ToSingle(const U64& a, bool round_to_nearest, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(Opcode::FPU64ToSingle, a, Imm1(round_to_nearest)); +} + U128 IREmitter::FPVectorAbs(size_t esize, const U128& a) { switch (esize) { case 16: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 139fb573..20702666 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -272,8 +272,10 @@ public: U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled); U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled); U64 FPS64ToDouble(const U64& a, bool round_to_nearest, bool fpscr_controlled); + U32 FPS64ToSingle(const U64& a, bool round_to_nearest, bool fpscr_controlled); U64 FPU32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled); U64 FPU64ToDouble(const U64& a, bool round_to_nearest, bool fpscr_controlled); + U32 FPU64ToSingle(const U64& a, bool round_to_nearest, bool fpscr_controlled); U128 FPVectorAbs(size_t esize, const U128& a); U128 FPVectorAbsoluteDifference(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 3b52ac00..9b570638 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -394,8 +394,10 @@ OPCODE(FPU32ToSingle, T::U32, T::U32, T::U OPCODE(FPS32ToSingle, T::U32, T::U32, T::U1 ) OPCODE(FPU32ToDouble, T::U64, T::U32, T::U1 ) OPCODE(FPU64ToDouble, T::U64, T::U64, T::U1 ) +OPCODE(FPU64ToSingle, T::U32, T::U64, T::U1 ) OPCODE(FPS32ToDouble, T::U64, T::U32, T::U1 ) OPCODE(FPS64ToDouble, T::U64, T::U64, T::U1 ) +OPCODE(FPS64ToSingle, T::U32, T::U64, T::U1 ) // Floating-point vector instructions OPCODE(FPVectorAbs16, T::U128, T::U128 )