emit_x64_vector: Emit VPMULLQ in EmitVectorMultiply64 on AVX-512{DQ, VL} capable CPUs

Shortens code-gen down to a single instruction in the 64-bit path.
This commit is contained in:
Lioncash 2018-05-14 12:11:50 -04:00 committed by MerryMage
parent 9054d1c20b
commit 6de5ed96e5

View file

@ -1459,6 +1459,17 @@ void EmitX64::EmitVectorMultiply32(EmitContext& ctx, IR::Inst* inst) {
}
void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) {
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
code.vpmullq(a, a, b);
ctx.reg_alloc.DefineValue(inst, a);
return;
}
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);