diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index bd84b00c..77d4207a 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -31,6 +31,28 @@ static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i ctx.reg_alloc.DefineValue(inst, xmm_a); } +template +static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { + const auto fn = static_cast*>(lambda); + constexpr u32 stack_space = 2 * 16; + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); + ctx.reg_alloc.EndOfAllocScope(); + + ctx.reg_alloc.HostCall(nullptr); + code.sub(rsp, stack_space + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); + + code.movaps(xword[code.ABI_PARAM2], arg1); + code.CallFunction(fn); + code.movaps(xmm0, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); + + code.add(rsp, stack_space + ABI_SHADOW_SPACE); + + ctx.reg_alloc.DefineValue(inst, xmm0); +} + template static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { const auto fn = static_cast*>(lambda); @@ -48,7 +70,7 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins code.movaps(xword[code.ABI_PARAM2], arg1); code.movaps(xword[code.ABI_PARAM3], arg2); - code.CallFunction(+fn); + code.CallFunction(fn); code.movaps(xmm0, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.add(rsp, stack_space + ABI_SHADOW_SPACE); @@ -909,7 +931,7 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { return; } - EmitTwoArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a){ + EmitOneArgumentFallback(code, ctx, inst, [](std::array& result, const std::array& a){ for (size_t i = 0; i < 16; ++i) { result[i] = static_cast(Common::BitCount(a[i])); }