From d70ee7c0d183f9f4bd85ca971848b850c6c9cb74 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 26 Apr 2018 13:07:10 -0400 Subject: [PATCH] emit_x64_vector: Use VBPROADCAST where applicable and available Uses the instruction that does what it says in its name if available. Allows avoiding the use of a scratch register in EmitVectorBroadcast8() and EmitVectorBroadcastLower8()'s SSSE3 path. --- src/backend_x64/emit_x64_vector.cpp | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 3bbf9f46..236345aa 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -407,7 +407,10 @@ void EmitX64::EmitVectorBroadcastLower8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX2)) { + code.vpbroadcastb(a, a); + code.movq(a, a); + } else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.pxor(tmp, tmp); @@ -446,7 +449,9 @@ void EmitX64::EmitVectorBroadcast8(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX2)) { + code.vpbroadcastb(a, a); + } else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) { Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.pxor(tmp, tmp); @@ -465,8 +470,12 @@ void EmitX64::EmitVectorBroadcast16(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - code.pshuflw(a, a, 0); - code.punpcklqdq(a, a); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX2)) { + code.vpbroadcastw(a, a); + } else { + code.pshuflw(a, a, 0); + code.punpcklqdq(a, a); + } ctx.reg_alloc.DefineValue(inst, a); } @@ -476,7 +485,11 @@ void EmitX64::EmitVectorBroadcast32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - code.pshufd(a, a, 0); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX2)) { + code.vpbroadcastd(a, a); + } else { + code.pshufd(a, a, 0); + } ctx.reg_alloc.DefineValue(inst, a); } @@ -486,7 +499,11 @@ void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) { Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - code.punpcklqdq(a, a); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX2)) { + code.vpbroadcastq(a, a); + } else { + code.punpcklqdq(a, a); + } ctx.reg_alloc.DefineValue(inst, a); }