From 81e572c78cfa394c9762da96272b550013dafd31 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 7 Jul 2018 14:48:16 -0400 Subject: [PATCH] ir: Extend FPVectorAbs opcode to also handle 16-bit elements for FP16 --- src/backend_x64/emit_x64_vector_floating_point.cpp | 11 +++++++++++ src/frontend/ir/ir_emitter.cpp | 2 ++ src/frontend/ir/opcodes.inc | 1 + 3 files changed, 14 insertions(+) diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index 3b727044..0fa31698 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -195,6 +195,17 @@ void EmitX64::EmitFPVectorAbsoluteDifference64(EmitContext& ctx, IR::Inst* inst) ctx.reg_alloc.DefineValue(inst, a); } +void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Address mask = code.MConst(xword, 0x7FFF7FFF7FFF7FFF, 0x7FFF7FFF7FFF7FFF); + + code.pand(a, mask); + + ctx.reg_alloc.DefineValue(inst, a); +} + void EmitX64::EmitFPVectorAbs32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 2c6d0352..fd62570b 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1493,6 +1493,8 @@ U64 IREmitter::FPU32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_con U128 IREmitter::FPVectorAbs(size_t esize, const U128& a) { switch (esize) { + case 16: + return Inst(Opcode::FPVectorAbs16, a); case 32: return Inst(Opcode::FPVectorAbs32, a); case 64: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 9d1f174c..2d9dac72 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -396,6 +396,7 @@ OPCODE(FPU32ToDouble, T::U64, T::U32, T::U OPCODE(FPS32ToDouble, T::U64, T::U32, T::U1 ) // Floating-point vector instructions +OPCODE(FPVectorAbs16, T::U128, T::U128 ) OPCODE(FPVectorAbs32, T::U128, T::U128 ) OPCODE(FPVectorAbs64, T::U128, T::U128 ) OPCODE(FPVectorAbsoluteDifference32, T::U128, T::U128, T::U128 )