From d4ee878cbde899215dc5567290a6780362596990 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 2 May 2018 16:04:02 -0400 Subject: [PATCH] emit_x64_vector: Use VPSRAQ in EmitVectorArithmeticShiftRight64() if AVX-512VL is available --- src/backend_x64/emit_x64_vector.cpp | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 2c64995a..c7acfca1 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -385,20 +385,24 @@ void EmitX64::EmitVectorArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); - Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const u8 shift_amount = std::min(args[1].GetImmediateU8(), u8(63)); - const u64 sign_bit = 0x80000000'00000000u >> shift_amount; + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)) { + code.vpsraq(result, result, shift_amount); + } else { + const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); - code.pxor(tmp2, tmp2); - code.psrlq(result, shift_amount); - code.movdqa(tmp1, code.MConst(xword, sign_bit, sign_bit)); - code.pand(tmp1, result); - code.psubq(tmp2, tmp1); - code.por(result, tmp2); + const u64 sign_bit = 0x80000000'00000000u >> shift_amount; + + code.pxor(tmp2, tmp2); + code.psrlq(result, shift_amount); + code.movdqa(tmp1, code.MConst(xword, sign_bit, sign_bit)); + code.pand(tmp1, result); + code.psubq(tmp2, tmp1); + code.por(result, tmp2); + } ctx.reg_alloc.DefineValue(inst, result); }