diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index d0400c6b..10210109 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -173,6 +173,28 @@ static void EmitVectorOperation64(BlockOfCode& code, EmitContext& ctx, IR::Inst* ctx.reg_alloc.DefineValue(inst, result); } +void EmitX64::EmitFPVectorAbsoluteDifference32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); + + code.subps(a, b); + code.andps(a, code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF)); + + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitX64::EmitFPVectorAbsoluteDifference64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); + + code.subpd(a, b); + code.andpd(a, code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF)); + + ctx.reg_alloc.DefineValue(inst, a); +} + void EmitX64::EmitFPVectorAdd32(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::addps); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index a6047737..50a9edcc 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1491,6 +1491,17 @@ U64 IREmitter::FPU32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_con return Inst(Opcode::FPU32ToDouble, a, Imm1(round_to_nearest)); } +U128 IREmitter::FPVectorAbsoluteDifference(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 32: + return Inst(Opcode::FPVectorAbsoluteDifference32, a, b); + case 64: + return Inst(Opcode::FPVectorAbsoluteDifference64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::FPVectorAdd(size_t esize, const U128& a, const U128& b) { switch (esize) { case 32: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 43e5895e..a67da3fe 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -273,6 +273,7 @@ public: U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled); U64 FPU32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled); + U128 FPVectorAbsoluteDifference(size_t esize, const U128& a, const U128& b); U128 FPVectorAdd(size_t esize, const U128& a, const U128& b); U128 FPVectorDiv(size_t esize, const U128& a, const U128& b); U128 FPVectorEqual(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 72ba3a16..4532b4ce 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -396,6 +396,8 @@ OPCODE(FPU32ToDouble, T::U64, T::U32, T::U OPCODE(FPS32ToDouble, T::U64, T::U32, T::U1 ) // Floating-point vector instructions +OPCODE(FPVectorAbsoluteDifference32, T::U128, T::U128, T::U128 ) +OPCODE(FPVectorAbsoluteDifference64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorAdd32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorAdd64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorDiv32, T::U128, T::U128, T::U128 )