From 87372917f9d0b484e1fc6abb0d7aec65b0567ec5 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 8 Sep 2018 01:51:50 -0400 Subject: [PATCH] emit_x64_vector: Simplify "position == 0" case for EmitVectorExtractLower() In the event position == 0, we can just treat it as a simple movq, clearing the upper half of the XMM register. This also makes that case use only one register. --- src/backend/x64/emit_x64_vector.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/backend/x64/emit_x64_vector.cpp b/src/backend/x64/emit_x64_vector.cpp index 4b960b29..7069b7e5 100644 --- a/src/backend/x64/emit_x64_vector.cpp +++ b/src/backend/x64/emit_x64_vector.cpp @@ -780,13 +780,16 @@ void EmitX64::EmitVectorExtractLower(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const u8 position = args[2].GetImmediateU8(); ASSERT(position % 8 == 0); - code.punpcklqdq(xmm_a, xmm_b); - code.psrldq(xmm_a, position / 8); + if (position != 0) { + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + + code.punpcklqdq(xmm_a, xmm_b); + code.psrldq(xmm_a, position / 8); + } code.movq(xmm_a, xmm_a); ctx.reg_alloc.DefineValue(inst, xmm_a);