From 135107279d13ab0a1b84b1b076a94541103772c1 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 31 Aug 2018 09:07:05 -0400 Subject: [PATCH] emit_x64_vector: Simplify EmitVectorLogicalShiftRight8() We can generate the mask and AND it against the result of a halfword shift instead of looping. --- src/backend/x64/emit_x64_vector.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/backend/x64/emit_x64_vector.cpp b/src/backend/x64/emit_x64_vector.cpp index 06847c40..6a771069 100644 --- a/src/backend/x64/emit_x64_vector.cpp +++ b/src/backend/x64/emit_x64_vector.cpp @@ -1102,18 +1102,15 @@ void EmitX64::EmitVectorLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); - Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const u8 shift_amount = args[1].GetImmediateU8(); - // TODO: Optimize - code.pcmpeqb(mask, mask); // mask = 0xFF - code.paddb(mask, mask); // mask = 0xFE - code.pxor(zeros, zeros); - for (size_t i = 0; i < shift_amount; ++i) { - code.pand(result, mask); - code.pavgb(result, zeros); + if (shift_amount > 0) { + const u64 replicand = 0xFEULL >> shift_amount; + const u64 mask = Common::Replicate(replicand, Common::BitSize()); + + code.psrlw(result, shift_amount); + code.pand(result, code.MConst(xword, mask, mask)); } ctx.reg_alloc.DefineValue(inst, result);