diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index e6659ad2..18136087 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -1138,6 +1138,42 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { }); } +enum class ShuffleType { + LowHalfwords, + HighHalfwords, + Words +}; + +static void VectorShuffleImpl(ShuffleType type, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const u8 mask = args[1].GetImmediateU8(); + + if (type == ShuffleType::LowHalfwords) { + code.pshuflw(result, operand, mask); + } else if (type == ShuffleType::HighHalfwords) { + code.pshufhw(result, operand, mask); + } else { + code.pshufw(result, operand, mask); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitX64::EmitVectorShuffleHighHalfwords(EmitContext& ctx, IR::Inst* inst) { + VectorShuffleImpl(ShuffleType::HighHalfwords, ctx, inst, code); +} + +void EmitX64::EmitVectorShuffleLowHalfwords(EmitContext& ctx, IR::Inst* inst) { + VectorShuffleImpl(ShuffleType::LowHalfwords, ctx, inst, code); +} + +void EmitX64::EmitVectorShuffleWords(EmitContext& ctx, IR::Inst* inst) { + VectorShuffleImpl(ShuffleType::Words, ctx, inst, code); +} + void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 92cbe141..d2b7c1e6 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1103,6 +1103,18 @@ U128 IREmitter::VectorPopulationCount(const U128& a) { return Inst(Opcode::VectorPopulationCount, a); } +U128 IREmitter::VectorShuffleHighHalfwords(const U128& a, u8 mask) { + return Inst(Opcode::VectorShuffleHighHalfwords, a, mask); +} + +U128 IREmitter::VectorShuffleLowHalfwords(const U128& a, u8 mask) { + return Inst(Opcode::VectorShuffleLowHalfwords, a, mask); +} + +U128 IREmitter::VectorShuffleWords(const U128& a, u8 mask) { + return Inst(Opcode::VectorShuffleWords, a, mask); +} + U128 IREmitter::VectorSignExtend(size_t original_esize, const U128& a) { switch (original_esize) { case 8: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 5adb708e..be78b875 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -238,6 +238,9 @@ public: U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b); U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b); U128 VectorPopulationCount(const U128& a); + U128 VectorShuffleHighHalfwords(const U128& a, u8 mask); + U128 VectorShuffleLowHalfwords(const U128& a, u8 mask); + U128 VectorShuffleWords(const U128& a, u8 mask); U128 VectorSignExtend(size_t original_esize, const U128& a); U128 VectorSub(size_t esize, const U128& a, const U128& b); U128 VectorZeroExtend(size_t original_esize, const U128& a); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 652eca56..b73e5e55 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -287,6 +287,9 @@ OPCODE(VectorPairedAdd16, T::U128, T::U128, T::U128 OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 ) OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 ) OPCODE(VectorPopulationCount, T::U128, T::U128 ) +OPCODE(VectorShuffleHighHalfwords, T::U128, T::U128, T::U8 ) +OPCODE(VectorShuffleLowHalfwords, T::U128, T::U128, T::U8 ) +OPCODE(VectorShuffleWords, T::U128, T::U128, T::U8 ) OPCODE(VectorSignExtend8, T::U128, T::U128 ) OPCODE(VectorSignExtend16, T::U128, T::U128 ) OPCODE(VectorSignExtend32, T::U128, T::U128 )