emit_x64{,_vector}_floating_point: Fix non-FMA execution
Avoid repeated calls to GetArgumentInfo
This commit is contained in:
parent
6023bcd8ad
commit
3806284cbe
2 changed files with 33 additions and 41 deletions
|
@ -283,7 +283,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
if (ctx.FPCR().DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
@ -293,22 +293,10 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
|
||||||
fn(result, operand);
|
fn(result, operand);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
return;
|
ForceToDefaultNaN<fsize>(code, result);
|
||||||
}
|
|
||||||
|
|
||||||
if (ctx.FPCR().DN()) {
|
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
|
||||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
|
||||||
|
|
||||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
|
||||||
(code.*fn)(result, operand);
|
|
||||||
} else {
|
|
||||||
fn(result, operand);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ForceToDefaultNaN<fsize>(code, result);
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -605,9 +593,9 @@ template<size_t fsize>
|
||||||
static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
|
if constexpr (fsize != 16) {
|
||||||
if (code.HasFMA() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
if (code.HasFMA() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
@ -680,7 +668,6 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], args[2]);
|
ctx.reg_alloc.HostCall(inst, args[0], args[1], args[2]);
|
||||||
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
|
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
@ -834,10 +821,10 @@ template<size_t fsize>
|
||||||
static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
if (code.HasFMA() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
if (code.HasFMA() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
Xbyak::Label end, fallback;
|
Xbyak::Label end, fallback;
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
@ -852,8 +839,6 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code.HasFMA()) {
|
if (code.HasFMA()) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
Xbyak::Label end, fallback;
|
Xbyak::Label end, fallback;
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
@ -888,8 +873,6 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
@ -903,7 +886,6 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
ctx.reg_alloc.HostCall(inst, args[0], args[1]);
|
ctx.reg_alloc.HostCall(inst, args[0], args[1]);
|
||||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||||
|
@ -1038,10 +1020,10 @@ template<size_t fsize>
|
||||||
static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
@ -1055,8 +1037,6 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code.HasFMA() && code.HasAVX()) {
|
if (code.HasFMA() && code.HasAVX()) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
Xbyak::Label end, fallback;
|
Xbyak::Label end, fallback;
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
@ -1103,8 +1083,6 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
@ -1119,7 +1097,6 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
ctx.reg_alloc.HostCall(inst, args[0], args[1]);
|
ctx.reg_alloc.HostCall(inst, args[0], args[1]);
|
||||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||||
|
|
|
@ -989,10 +989,10 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
};
|
};
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
|
||||||
|
|
||||||
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
|
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
|
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
|
||||||
|
@ -1006,6 +1006,9 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code.HasFMA() && code.HasAVX()) {
|
if (code.HasFMA() && code.HasAVX()) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
|
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
@ -1041,6 +1044,8 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
|
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
|
||||||
|
@ -1247,10 +1252,10 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
};
|
};
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
|
||||||
|
|
||||||
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||||
|
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
@ -1265,6 +1270,9 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code.HasFMA() && code.HasAVX()) {
|
if (code.HasFMA() && code.HasAVX()) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||||
|
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
@ -1297,6 +1305,8 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
@ -1454,10 +1464,10 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
};
|
};
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
|
||||||
|
|
||||||
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||||
|
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
@ -1473,6 +1483,9 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code.HasFMA() && code.HasAVX()) {
|
if (code.HasFMA() && code.HasAVX()) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||||
|
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
@ -1511,6 +1524,8 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
Loading…
Reference in a new issue