diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 0145931f..fa12e0dd 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -1875,6 +1875,55 @@ void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { }); } +static void EmitVectorSignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); + + code.movdqa(mask, x); + code.movdqa(tmp1, y); + + switch (esize) { + case 8: + code.pcmpgtb(mask, y); + code.psubb(tmp1, x); + code.psubb(x, y); + break; + case 16: + code.pcmpgtw(mask, y); + code.psubw(tmp1, x); + code.psubw(x, y); + break; + case 32: + code.pcmpgtd(mask, y); + code.psubd(tmp1, x); + code.psubd(x, y); + break; + } + + code.movdqa(tmp2, mask); + code.pand(x, mask); + code.pandn(tmp2, tmp1); + code.por(x, tmp2); + + ctx.reg_alloc.DefineValue(inst, x); +} + +void EmitX64::EmitVectorSignedAbsoluteDifference8(EmitContext& ctx, IR::Inst* inst) { + EmitVectorSignedAbsoluteDifference(8, ctx, inst, code); +} + +void EmitX64::EmitVectorSignedAbsoluteDifference16(EmitContext& ctx, IR::Inst* inst) { + EmitVectorSignedAbsoluteDifference(16, ctx, inst, code); +} + +void EmitX64::EmitVectorSignedAbsoluteDifference32(EmitContext& ctx, IR::Inst* inst) { + EmitVectorSignedAbsoluteDifference(32, ctx, inst, code); +} + void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubb); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 962f7027..07fe0d55 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1225,6 +1225,19 @@ U128 IREmitter::VectorSignExtend(size_t original_esize, const U128& a) { return {}; } +U128 IREmitter::VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorSignedAbsoluteDifference8, a, b); + case 16: + return Inst(Opcode::VectorSignedAbsoluteDifference16, a, b); + case 32: + return Inst(Opcode::VectorSignedAbsoluteDifference32, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) { switch (esize) { case 8: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index a9d91cdb..5af45a7a 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -240,6 +240,7 @@ public: U128 VectorShuffleLowHalfwords(const U128& a, u8 mask); U128 VectorShuffleWords(const U128& a, u8 mask); U128 VectorSignExtend(size_t original_esize, const U128& a); + U128 VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b); U128 VectorSub(size_t esize, const U128& a, const U128& b); U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b); U128 VectorZeroExtend(size_t original_esize, const U128& a); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 0f442998..d0d20a29 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -332,6 +332,9 @@ OPCODE(VectorSignExtend8, T::U128, T::U128 OPCODE(VectorSignExtend16, T::U128, T::U128 ) OPCODE(VectorSignExtend32, T::U128, T::U128 ) OPCODE(VectorSignExtend64, T::U128, T::U128 ) +OPCODE(VectorSignedAbsoluteDifference8, T::U128, T::U128, T::U128 ) +OPCODE(VectorSignedAbsoluteDifference16, T::U128, T::U128, T::U128 ) +OPCODE(VectorSignedAbsoluteDifference32, T::U128, T::U128, T::U128 ) OPCODE(VectorSub8, T::U128, T::U128, T::U128 ) OPCODE(VectorSub16, T::U128, T::U128, T::U128 ) OPCODE(VectorSub32, T::U128, T::U128, T::U128 )