From 701f43d61ea21cec64d407b53cdc9ae071934810 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 29 Mar 2018 16:11:14 -0400 Subject: [PATCH] IR: Add opcodes for interleaving upper-order bytes/halfwords/words/doublewords I should have added this when I introduced the functions for interleaving low-order equivalents for consistency in the interface. --- src/backend_x64/emit_x64_vector.cpp | 40 +++++++++++++++++++++++++++++ src/frontend/ir/ir_emitter.cpp | 15 +++++++++++ src/frontend/ir/ir_emitter.h | 1 + src/frontend/ir/opcodes.inc | 4 +++ 4 files changed, 60 insertions(+) diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 928421d6..bfc56f10 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -552,6 +552,46 @@ void EmitX64::EmitVectorInterleaveLower64(EmitContext& ctx, IR::Inst* inst) { EmitVectorInterleaveLower(code, ctx, inst, 64); } +static void EmitVectorInterleaveUpper(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); + + switch (size) { + case 8: + code.punpckhbw(a, b); + break; + case 16: + code.punpckhwd(a, b); + break; + case 32: + code.punpckhdq(a, b); + break; + case 64: + code.punpckhqdq(a, b); + break; + } + + ctx.reg_alloc.DefineValue(inst, a); +} + +void EmitX64::EmitVectorInterleaveUpper8(EmitContext& ctx, IR::Inst* inst) { + EmitVectorInterleaveUpper(code, ctx, inst, 8); +} + +void EmitX64::EmitVectorInterleaveUpper16(EmitContext& ctx, IR::Inst* inst) { + EmitVectorInterleaveUpper(code, ctx, inst, 16); +} + +void EmitX64::EmitVectorInterleaveUpper32(EmitContext& ctx, IR::Inst* inst) { + EmitVectorInterleaveUpper(code, ctx, inst, 32); +} + +void EmitX64::EmitVectorInterleaveUpper64(EmitContext& ctx, IR::Inst* inst) { + EmitVectorInterleaveUpper(code, ctx, inst, 64); +} + void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index d2b7c1e6..285e2aff 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -914,6 +914,21 @@ U128 IREmitter::VectorInterleaveLower(size_t esize, const U128& a, const U128& b return {}; } +U128 IREmitter::VectorInterleaveUpper(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorInterleaveUpper8, a, b); + case 16: + return Inst(Opcode::VectorInterleaveUpper16, a, b); + case 32: + return Inst(Opcode::VectorInterleaveUpper32, a, b); + case 64: + return Inst(Opcode::VectorInterleaveUpper64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::VectorLessEqualSigned(size_t esize, const U128& a, const U128& b) { return VectorNot(VectorGreaterSigned(esize, a, b)); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index be78b875..fbaed83e 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -220,6 +220,7 @@ public: U128 VectorGreaterSigned(size_t esize, const U128& a, const U128& b); U128 VectorGreaterUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b); + U128 VectorInterleaveUpper(size_t esize, const U128& a, const U128& b); U128 VectorLessEqualSigned(size_t esize, const U128& a, const U128& b); U128 VectorLessEqualUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorLessSigned(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index b73e5e55..00c46534 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -242,6 +242,10 @@ OPCODE(VectorInterleaveLower8, T::U128, T::U128, T::U128 OPCODE(VectorInterleaveLower16, T::U128, T::U128, T::U128 ) OPCODE(VectorInterleaveLower32, T::U128, T::U128, T::U128 ) OPCODE(VectorInterleaveLower64, T::U128, T::U128, T::U128 ) +OPCODE(VectorInterleaveUpper8, T::U128, T::U128, T::U128 ) +OPCODE(VectorInterleaveUpper16, T::U128, T::U128, T::U128 ) +OPCODE(VectorInterleaveUpper32, T::U128, T::U128, T::U128 ) +OPCODE(VectorInterleaveUpper64, T::U128, T::U128, T::U128 ) OPCODE(VectorLogicalShiftLeft8, T::U128, T::U128, T::U8 ) OPCODE(VectorLogicalShiftLeft16, T::U128, T::U128, T::U8 ) OPCODE(VectorLogicalShiftLeft32, T::U128, T::U128, T::U8 )