emit_x64_floating_point: Deduplicate EmitFPMulAdd implementation
This commit is contained in:
parent
e038fe72df
commit
2019d32743
1 changed files with 19 additions and 28 deletions
|
@ -14,6 +14,7 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/fp/fpcr.h"
|
#include "common/fp/fpcr.h"
|
||||||
#include "common/fp/fpsr.h"
|
#include "common/fp/fpsr.h"
|
||||||
|
#include "common/fp/info.h"
|
||||||
#include "common/fp/op.h"
|
#include "common/fp/op.h"
|
||||||
#include "common/fp/rounding_mode.h"
|
#include "common/fp/rounding_mode.h"
|
||||||
#include "common/fp/util.h"
|
#include "common/fp/util.h"
|
||||||
|
@ -648,8 +649,22 @@ void EmitX64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
FPThreeOp<64>(code, ctx, inst, &Xbyak::CodeGenerator::mulsd);
|
FPThreeOp<64>(code, ctx, inst, &Xbyak::CodeGenerator::mulsd);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename FPT>
|
template<size_t fsize>
|
||||||
static void EmitFPMulAddFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tFMA)) {
|
||||||
|
FPFourOp<fsize>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand2, Xbyak::Xmm operand3) {
|
||||||
|
FCODE(vfmadd231s)(result, operand2, operand3);
|
||||||
|
}, [](FPT a, FPT b, FPT c) -> FPT {
|
||||||
|
if (FP::IsQNaN(a) && ((FP::IsInf(b) && FP::IsZero(c)) || (FP::IsZero(b) && FP::IsInf(c)))) {
|
||||||
|
return FP::FPInfo<FPT>::DefaultNaN();
|
||||||
|
}
|
||||||
|
return *FP::ProcessNaNs(a, b, c);
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], args[2]);
|
ctx.reg_alloc.HostCall(inst, args[0], args[1], args[2]);
|
||||||
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR());
|
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR());
|
||||||
|
@ -666,35 +681,11 @@ static void EmitFPMulAddFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPMulAdd32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPMulAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tFMA)) {
|
EmitFPMulAdd<32>(code, ctx, inst);
|
||||||
FPFourOp<32>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand2, Xbyak::Xmm operand3) {
|
|
||||||
code.vfmadd231ss(result, operand2, operand3);
|
|
||||||
}, [](u32 a, u32 b, u32 c) -> u32 {
|
|
||||||
if (FP::IsQNaN(a) && ((FP::IsInf(b) && FP::IsZero(c)) || (FP::IsZero(b) && FP::IsInf(c)))) {
|
|
||||||
return f32_nan;
|
|
||||||
}
|
|
||||||
return *FP::ProcessNaNs(a, b, c);
|
|
||||||
});
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
EmitFPMulAddFallback<u32>(code, ctx, inst);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPMulAdd64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPMulAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tFMA)) {
|
EmitFPMulAdd<64>(code, ctx, inst);
|
||||||
FPFourOp<64>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand2, Xbyak::Xmm operand3) {
|
|
||||||
code.vfmadd231sd(result, operand2, operand3);
|
|
||||||
}, [](u64 a, u64 b, u64 c) -> u64 {
|
|
||||||
if (FP::IsQNaN(a) && ((FP::IsInf(b) && FP::IsZero(c)) || (FP::IsZero(b) && FP::IsInf(c)))) {
|
|
||||||
return f64_nan;
|
|
||||||
}
|
|
||||||
return *FP::ProcessNaNs(a, b, c);
|
|
||||||
});
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
EmitFPMulAddFallback<u64>(code, ctx, inst);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
|
|
Loading…
Reference in a new issue