From b47adaee1dea956c6e1985fec77d8c334587a595 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 1 Jun 2020 15:41:29 +0100 Subject: [PATCH] emit_x64_vector: SSSE3 implementation of EmitVectorExtract --- src/backend/x64/emit_x64_vector.cpp | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/backend/x64/emit_x64_vector.cpp b/src/backend/x64/emit_x64_vector.cpp index e7b83066..f4c326eb 100644 --- a/src/backend/x64/emit_x64_vector.cpp +++ b/src/backend/x64/emit_x64_vector.cpp @@ -1041,19 +1041,30 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorExtract(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const u8 position = args[2].GetImmediateU8(); ASSERT(position % 8 == 0); - if (position != 0) { + if (position == 0) { + ctx.reg_alloc.DefineValue(inst, args[0]); + return; + } + + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); - code.psrldq(xmm_a, position / 8); - code.pslldq(xmm_b, (128 - position) / 8); - code.por(xmm_a, xmm_b); + code.palignr(xmm_b, xmm_a, position / 8); + ctx.reg_alloc.DefineValue(inst, xmm_b); + return; } + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); + + code.psrldq(xmm_a, position / 8); + code.pslldq(xmm_b, (128 - position) / 8); + code.por(xmm_a, xmm_b); + ctx.reg_alloc.DefineValue(inst, xmm_a); }