diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index 10210109..3b727044 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -195,6 +195,28 @@ void EmitX64::EmitFPVectorAbsoluteDifference64(EmitContext& ctx, IR::Inst* inst) ctx.reg_alloc.DefineValue(inst, a); } +void EmitX64::EmitFPVectorAbs32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Address mask = code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF); + + code.andps(a, mask); + + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitX64::EmitFPVectorAbs64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Address mask = code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF); + + code.andpd(a, mask); + + ctx.reg_alloc.DefineValue(inst, a); +} + void EmitX64::EmitFPVectorAdd32(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::addps); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 50a9edcc..2c6d0352 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1491,6 +1491,17 @@ U64 IREmitter::FPU32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_con return Inst(Opcode::FPU32ToDouble, a, Imm1(round_to_nearest)); } +U128 IREmitter::FPVectorAbs(size_t esize, const U128& a) { + switch (esize) { + case 32: + return Inst(Opcode::FPVectorAbs32, a); + case 64: + return Inst(Opcode::FPVectorAbs64, a); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::FPVectorAbsoluteDifference(size_t esize, const U128& a, const U128& b) { switch (esize) { case 32: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index a67da3fe..1559c5a6 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -273,6 +273,7 @@ public: U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled); U64 FPU32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled); + U128 FPVectorAbs(size_t esize, const U128& a); U128 FPVectorAbsoluteDifference(size_t esize, const U128& a, const U128& b); U128 FPVectorAdd(size_t esize, const U128& a, const U128& b); U128 FPVectorDiv(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 4532b4ce..9d1f174c 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -396,6 +396,8 @@ OPCODE(FPU32ToDouble, T::U64, T::U32, T::U OPCODE(FPS32ToDouble, T::U64, T::U32, T::U1 ) // Floating-point vector instructions +OPCODE(FPVectorAbs32, T::U128, T::U128 ) +OPCODE(FPVectorAbs64, T::U128, T::U128 ) OPCODE(FPVectorAbsoluteDifference32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorAbsoluteDifference64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorAdd32, T::U128, T::U128, T::U128 )