From b22c5961f9ec38d8d5ca951ef64b1903f4bcb7f3 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 10 Feb 2018 11:05:22 +0000 Subject: [PATCH] IR: Implement VectorLogicalShiftRight --- src/backend_x64/emit_x64_vector.cpp | 53 +++++++++++++++++++++++++++++ src/frontend/ir/ir_emitter.cpp | 15 ++++++++ src/frontend/ir/ir_emitter.h | 1 + src/frontend/ir/opcodes.inc | 4 +++ 4 files changed, 73 insertions(+) diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index adbae27e..45974313 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -634,6 +634,59 @@ void EmitX64::EmitVectorLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); + const u8 shift_amount = args[1].GetImmediateU8(); + + // TODO: Optimize + code.pcmpeqb(mask, mask); // mask = 0xFF + code.paddb(mask, mask); // mask = 0xFE + code.pxor(zeros, zeros); + for (size_t i = 0; i < shift_amount; ++i) { + code.pand(result, mask); + code.pavgb(result, zeros); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitX64::EmitVectorLogicalShiftRight16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const u8 shift_amount = args[1].GetImmediateU8(); + + code.psrlw(result, shift_amount); + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitX64::EmitVectorLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const u8 shift_amount = args[1].GetImmediateU8(); + + code.psrld(result, shift_amount); + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const u8 shift_amount = args[1].GetImmediateU8(); + + code.psrlq(result, shift_amount); + + ctx.reg_alloc.DefineValue(inst, result); +} + static void EmitVectorZeroExtend(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index eb0d0141..34837302 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -883,6 +883,21 @@ U128 IREmitter::VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amo return {}; } +U128 IREmitter::VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_amount) { + switch (esize) { + case 8: + return Inst(Opcode::VectorLogicalShiftRight8, a, Imm8(shift_amount)); + case 16: + return Inst(Opcode::VectorLogicalShiftRight16, a, Imm8(shift_amount)); + case 32: + return Inst(Opcode::VectorLogicalShiftRight32, a, Imm8(shift_amount)); + case 64: + return Inst(Opcode::VectorLogicalShiftRight64, a, Imm8(shift_amount)); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::VectorNot(const U128& a) { return Inst(Opcode::VectorNot, a); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index c79f5f09..da536852 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -215,6 +215,7 @@ public: U128 VectorEqual(size_t esize, const U128& a, const U128& b); U128 VectorInterleaveLower(size_t esize, const U128& a, const U128& b); U128 VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amount); + U128 VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_amount); U128 VectorNot(const U128& a); U128 VectorOr(const U128& a, const U128& b); U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 52590a1e..668a6bef 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -224,6 +224,10 @@ OPCODE(VectorLogicalShiftLeft8, T::U128, T::U128, T::U8 OPCODE(VectorLogicalShiftLeft16, T::U128, T::U128, T::U8 ) OPCODE(VectorLogicalShiftLeft32, T::U128, T::U128, T::U8 ) OPCODE(VectorLogicalShiftLeft64, T::U128, T::U128, T::U8 ) +OPCODE(VectorLogicalShiftRight8, T::U128, T::U128, T::U8 ) +OPCODE(VectorLogicalShiftRight16, T::U128, T::U128, T::U8 ) +OPCODE(VectorLogicalShiftRight32, T::U128, T::U128, T::U8 ) +OPCODE(VectorLogicalShiftRight64, T::U128, T::U128, T::U8 ) OPCODE(VectorNot, T::U128, T::U128 ) OPCODE(VectorOr, T::U128, T::U128, T::U128 ) OPCODE(VectorPairedAddLower8, T::U128, T::U128, T::U128 )