From 70ff2d73b54dfff60dc433af73563d683deba576 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 15 Jul 2018 18:26:54 +0100 Subject: [PATCH] A64: Implement UADDLP --- src/backend_x64/emit_x64_vector.cpp | 60 +++++++++++++++++++ src/frontend/A64/decoder/a64.inc | 2 +- .../translate/impl/simd_two_register_misc.cpp | 19 ++++++ src/frontend/ir/ir_emitter.cpp | 27 ++++++--- src/frontend/ir/ir_emitter.h | 1 + src/frontend/ir/opcodes.inc | 3 + 6 files changed, 104 insertions(+), 8 deletions(-) diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index ce686728..953c8e84 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -1735,6 +1735,66 @@ void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, a); } +void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); + + code.movdqa(c, a); + code.psllq(a, 32); + code.psraq(c, 32); + code.psraq(a, 32); + code.paddq(a, c); + + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitX64::EmitVectorPairedAddUnsignedWiden8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); + + code.movdqa(c, a); + code.psllw(a, 8); + code.psrlw(c, 8); + code.psrlw(a, 8); + code.paddw(a, c); + + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitX64::EmitVectorPairedAddUnsignedWiden16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); + + code.movdqa(c, a); + code.pslld(a, 16); + code.psrld(c, 16); + code.psrld(a, 16); + code.paddd(a, c); + + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); + + code.movdqa(c, a); + code.psllq(a, 32); + code.psrlq(c, 32); + code.psrlq(a, 32); + code.paddq(a, c); + + ctx.reg_alloc.DefineValue(inst, a); +} + void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512_BITALG)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index a75b4037..8fd2b49f 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -614,7 +614,7 @@ INST(FABS_2, "FABS (vector)", "0Q001 //INST(FRECPE_3, "FRECPE", "0Q00111011111001110110nnnnnddddd") //INST(FRECPE_4, "FRECPE", "0Q0011101z100001110110nnnnnddddd") INST(REV32_asimd, "REV32 (vector)", "0Q101110zz100000000010nnnnnddddd") -//INST(UADDLP, "UADDLP", "0Q101110zz100000001010nnnnnddddd") +INST(UADDLP, "UADDLP", "0Q101110zz100000001010nnnnnddddd") //INST(USQADD_2, "USQADD", "0Q101110zz100000001110nnnnnddddd") //INST(CLZ_asimd, "CLZ (vector)", "0Q101110zz100000010010nnnnnddddd") //INST(UADALP, "UADALP", "0Q101110zz100000011010nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp index 84977b6f..8e1f6678 100644 --- a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp +++ b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp @@ -365,6 +365,25 @@ bool TranslatorVisitor::REV64_asimd(bool Q, Imm<2> size, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::UADDLP(bool Q, Imm<2> size, Vec Vn, Vec Vd) { + if (size == 0b11) { + return ReservedValue(); + } + + const size_t esize = 8 << size.ZeroExtend(); + const size_t datasize = Q ? 128 : 64; + + const IR::U128 operand = V(datasize, Vn); + IR::U128 result = ir.VectorPairedAddUnsignedWiden(esize, operand); + + if (datasize == 64) { + result = ir.VectorZeroUpper(result); + } + + V(datasize, Vd, result); + return true; +} + bool TranslatorVisitor::SCVTF_int_4(bool Q, bool sz, Vec Vn, Vec Vd) { return IntegerConvertToFloat(*this, Q, sz, Vn, Vd, Signedness::Signed); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index e3858a7a..780f5523 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1140,6 +1140,21 @@ U128 IREmitter::VectorOr(const U128& a, const U128& b) { return Inst(Opcode::VectorOr, a, b); } +U128 IREmitter::VectorPairedAdd(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorPairedAdd8, a, b); + case 16: + return Inst(Opcode::VectorPairedAdd16, a, b); + case 32: + return Inst(Opcode::VectorPairedAdd32, a, b); + case 64: + return Inst(Opcode::VectorPairedAdd64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::VectorPairedAddLower(size_t esize, const U128& a, const U128& b) { switch (esize) { case 8: @@ -1153,16 +1168,14 @@ U128 IREmitter::VectorPairedAddLower(size_t esize, const U128& a, const U128& b) return {}; } -U128 IREmitter::VectorPairedAdd(size_t esize, const U128& a, const U128& b) { - switch (esize) { +U128 IREmitter::VectorPairedAddUnsignedWiden(size_t original_esize, const U128& a) { + switch (original_esize) { case 8: - return Inst(Opcode::VectorPairedAdd8, a, b); + return Inst(Opcode::VectorPairedAddUnsignedWiden8, a); case 16: - return Inst(Opcode::VectorPairedAdd16, a, b); + return Inst(Opcode::VectorPairedAddUnsignedWiden16, a); case 32: - return Inst(Opcode::VectorPairedAdd32, a, b); - case 64: - return Inst(Opcode::VectorPairedAdd64, a, b); + return Inst(Opcode::VectorPairedAddUnsignedWiden32, a); } UNREACHABLE(); return {}; diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index c783c164..0fb40827 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -236,6 +236,7 @@ public: U128 VectorOr(const U128& a, const U128& b); U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b); U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b); + U128 VectorPairedAddUnsignedWiden(size_t original_esize, const U128& a); U128 VectorPopulationCount(const U128& a); U128 VectorReverseBits(const U128& a); U128 VectorRotateLeft(size_t esize, const U128& a, u8 amount); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index c4b5e165..37a8d262 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -319,6 +319,9 @@ OPCODE(VectorOr, T::U128, T::U128, T::U OPCODE(VectorPairedAddLower8, T::U128, T::U128, T::U128 ) OPCODE(VectorPairedAddLower16, T::U128, T::U128, T::U128 ) OPCODE(VectorPairedAddLower32, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedAddUnsignedWiden8, T::U128, T::U128 ) +OPCODE(VectorPairedAddUnsignedWiden16, T::U128, T::U128 ) +OPCODE(VectorPairedAddUnsignedWiden32, T::U128, T::U128 ) OPCODE(VectorPairedAdd8, T::U128, T::U128, T::U128 ) OPCODE(VectorPairedAdd16, T::U128, T::U128, T::U128 ) OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 )