From 0d20423ad57da26c9b3993e1043b2e28cc13a499 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 25 Apr 2018 22:44:21 -0400 Subject: [PATCH] emit_x64_vector: Vectorize non-SSE4.1 fallback path for VectorMultiply32() --- src/backend_x64/emit_x64_vector.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 236345aa..184f080d 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -1117,9 +1117,21 @@ void EmitX64::EmitVectorMultiply32(EmitContext& ctx, IR::Inst* inst) { return; } - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a, const std::array& b){ - std::transform(a.begin(), a.end(), b.begin(), result.begin(), std::multiplies<>()); - }); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + + code.movdqa(tmp, a); + code.psrlq(a, 32); + code.pmuludq(tmp, b); + code.psrlq(b, 32); + code.pmuludq(a, b); + code.pshufd(tmp, tmp, 0b00001000); + code.pshufd(b, a, 0b00001000); + code.punpckldq(tmp, b); + + ctx.reg_alloc.DefineValue(inst, tmp); } void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) {