From 6de5ed96e50257272308227105b48000d177dbff Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 14 May 2018 12:11:50 -0400 Subject: [PATCH] emit_x64_vector: Emit VPMULLQ in EmitVectorMultiply64 on AVX-512{DQ, VL} capable CPUs Shortens code-gen down to a single instruction in the 64-bit path. --- src/backend_x64/emit_x64_vector.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index fa12e0dd..cb209197 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -1459,6 +1459,17 @@ void EmitX64::EmitVectorMultiply32(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); + + code.vpmullq(a, a, b); + + ctx.reg_alloc.DefineValue(inst, a); + return; + } + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);