emit_x64_vector: Emit VPMULLQ in EmitVectorMultiply64 on AVX-512{DQ, VL} capable CPUs

Shortens code-gen down to a single instruction in the 64-bit path.
2018-05-14 12:11:50 -04:00 · 2018-05-14 12:11:50 -04:00 · 6de5ed96e5
commit 6de5ed96e5
parent 9054d1c20b
1 changed files with 11 additions and 0 deletions
--- a/src/backend_x64/emit_x64_vector.cpp
+++ b/src/backend_x64/emit_x64_vector.cpp
@ -1459,6 +1459,17 @@ void EmitX64::EmitVectorMultiply32(EmitContext& ctx, IR::Inst* inst) {
 }

 void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) {
+    if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)) {
+        auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+        const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
+        const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
+
+        code.vpmullq(a, a, b);
+
+        ctx.reg_alloc.DefineValue(inst, a);
+        return;
+    }
+
    if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
        auto args = ctx.reg_alloc.GetArgumentInfo(inst);
        Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);