From 39593fcd2622666b06d70eaf1490f87a88367114 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 29 Aug 2018 14:34:01 -0400 Subject: [PATCH] emit_x64_vector: Remove fallback for EmitVectorSignExtend32() We can just do the extension manually, which gets rid of the need to fall back here. --- src/backend/x64/emit_x64_vector.cpp | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/backend/x64/emit_x64_vector.cpp b/src/backend/x64/emit_x64_vector.cpp index 2f8aa317..5eaf8f1f 100644 --- a/src/backend/x64/emit_x64_vector.cpp +++ b/src/backend/x64/emit_x64_vector.cpp @@ -2465,19 +2465,30 @@ void EmitX64::EmitVectorSignExtend16(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); code.pmovsxdq(a, a); - ctx.reg_alloc.DefineValue(inst, a); - return; + } else { + const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg64 gpr_tmp1 = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 gpr_tmp2 = ctx.reg_alloc.ScratchGpr(); + + code.movd(gpr_tmp1.cvt32(), a); + code.movsxd(gpr_tmp1, gpr_tmp1.cvt32()); + + code.pshufd(a, a, 0b11100101); + + code.movd(gpr_tmp2.cvt32(), a); + code.movsxd(gpr_tmp2, gpr_tmp2.cvt32()); + + code.movq(a, gpr_tmp1); + code.movq(xmm_tmp, gpr_tmp2); + code.punpcklqdq(a, xmm_tmp); } - EmitOneArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a) { - for (size_t i = 0; i < result.size(); ++i) { - result[i] = Common::SignExtend<32, u64>(a[i]); - } - }); + ctx.reg_alloc.DefineValue(inst, a); } void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) {