A64: Implement FMAXNM (scalar)

This commit is contained in:
MerryMage 2018-02-20 14:05:14 +00:00
parent 1dfce0894d
commit 1c9804ea07
6 changed files with 123 additions and 13 deletions

View file

@ -105,8 +105,8 @@ static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_
code.pand(xmm_value, xmm_scratch);
}
static Xbyak::Label PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b) {
Xbyak::Label nan, end;
static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Label& end) {
Xbyak::Label nan;
code.ucomiss(a, b);
code.jp(nan, code.T_NEAR);
@ -128,7 +128,6 @@ static Xbyak::Label PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
return end;
}
static void PostProcessNaNs32(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) {
@ -146,8 +145,8 @@ static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) {
code.L(end);
}
static Xbyak::Label PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b) {
Xbyak::Label nan, end;
static void PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Label& end) {
Xbyak::Label nan;
code.ucomisd(a, b);
code.jp(nan, code.T_NEAR);
@ -167,7 +166,6 @@ static Xbyak::Label PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
return end;
}
static void PostProcessNaNs64(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) {
@ -215,8 +213,8 @@ static Xbyak::Label ProcessNaN64(BlockOfCode& code, Xbyak::Xmm a) {
return end;
}
template <typename Function>
static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
template <typename PreprocessFunction, typename Function>
static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, PreprocessFunction preprocess, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Label end;
@ -225,12 +223,15 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Fun
Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();
if constexpr(!std::is_same_v<PreprocessFunction, std::nullptr_t>) {
preprocess(result, operand, gpr_scratch, end);
}
if (ctx.FPSCR_FTZ()) {
DenormalsAreZero32(code, result, gpr_scratch);
DenormalsAreZero32(code, operand, gpr_scratch);
}
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
end = PreProcessNaNs32(code, result, operand);
PreProcessNaNs32(code, result, operand, end);
}
if constexpr (std::is_member_function_pointer_v<Function>) {
(code.*fn)(result, operand);
@ -250,8 +251,8 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Fun
ctx.reg_alloc.DefineValue(inst, result);
}
template <typename Function>
static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
template <typename PreprocessFunction, typename Function>
static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, PreprocessFunction preprocess, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Label end;
@ -260,12 +261,15 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Fun
Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr();
if constexpr(!std::is_same_v<PreprocessFunction, std::nullptr_t>) {
preprocess(result, operand, gpr_scratch, end);
}
if (ctx.FPSCR_FTZ()) {
DenormalsAreZero64(code, result, gpr_scratch);
DenormalsAreZero64(code, operand, gpr_scratch);
}
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
end = PreProcessNaNs64(code, result, operand);
PreProcessNaNs64(code, result, operand, end);
}
if constexpr (std::is_member_function_pointer_v<Function>) {
(code.*fn)(result, operand);
@ -285,6 +289,16 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Fun
ctx.reg_alloc.DefineValue(inst, result);
}
template <typename Function>
static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
FPThreeOp32(code, ctx, inst, nullptr, fn);
}
template <typename Function>
static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
FPThreeOp64(code, ctx, inst, nullptr, fn);
}
template <typename Function>
static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -443,6 +457,74 @@ void EmitX64::EmitFPMax64(EmitContext& ctx, IR::Inst* inst) {
});
}
void EmitX64::EmitFPMaxNumeric32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp32(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand, Xbyak::Reg32 scratch, Xbyak::Label& end){
Xbyak::Label normal, normal_or_equal, result_is_result;
code.ucomiss(result, operand);
code.jnp(normal_or_equal);
// If operand == QNaN, result = result.
code.movd(scratch, operand);
code.shl(scratch, 1);
code.cmp(scratch, 0xff800000u);
code.jae(result_is_result);
// If operand == SNaN, let usual NaN code handle it.
code.cmp(scratch, 0xff000000u);
code.ja(normal);
// If result == SNaN, && operand != NaN, result = result.
code.movd(scratch, result);
code.shl(scratch, 1);
code.cmp(scratch, 0xff800000u);
code.jnae(result_is_result);
// If result == QNaN && operand != NaN, result = operand.
code.movaps(result, operand);
code.jmp(end);
code.L(result_is_result);
code.movaps(operand, result);
code.jmp(normal);
code.L(normal_or_equal);
code.jnz(normal);
code.andps(operand, result);
code.L(normal);
}, &Xbyak::CodeGenerator::maxss);
}
void EmitX64::EmitFPMaxNumeric64(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp64(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand, Xbyak::Reg64 scratch, Xbyak::Label& end){
Xbyak::Label normal, normal_or_equal, result_is_result;
code.ucomisd(result, operand);
code.jnp(normal_or_equal);
// If operand == QNaN, result = result.
code.movq(scratch, operand);
code.shl(scratch, 1);
code.cmp(scratch, code.MConst(qword, 0xfff0'0000'0000'0000u));
code.jae(result_is_result);
// If operand == SNaN, let usual NaN code handle it.
code.cmp(scratch, code.MConst(qword, 0xffe0'0000'0000'0000u));
code.ja(normal);
// If result == SNaN, && operand != NaN, result = result.
code.movq(scratch, result);
code.shl(scratch, 1);
code.cmp(scratch, code.MConst(qword, 0xfff0'0000'0000'0000u));
code.jnae(result_is_result);
// If result == QNaN && operand != NaN, result = operand.
code.movaps(result, operand);
code.jmp(end);
code.L(result_is_result);
code.movaps(operand, result);
code.jmp(normal);
code.L(normal_or_equal);
code.jnz(normal);
code.andps(operand, result);
code.L(normal);
}, &Xbyak::CodeGenerator::maxsd);
}
void EmitX64::EmitFPMin32(EmitContext& ctx, IR::Inst* inst) {
FPThreeOp32(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand){
Xbyak::Label normal, end;

View file

@ -927,7 +927,7 @@ INST(FADD_float, "FADD (scalar)", "00011
INST(FSUB_float, "FSUB (scalar)", "00011110yy1mmmmm001110nnnnnddddd")
INST(FMAX_float, "FMAX (scalar)", "00011110yy1mmmmm010010nnnnnddddd")
INST(FMIN_float, "FMIN (scalar)", "00011110yy1mmmmm010110nnnnnddddd")
//INST(FMAXNM_float, "FMAXNM (scalar)", "00011110yy1mmmmm011010nnnnnddddd")
INST(FMAXNM_float, "FMAXNM (scalar)", "00011110yy1mmmmm011010nnnnnddddd")
//INST(FMINNM_float, "FMINNM (scalar)", "00011110yy1mmmmm011110nnnnnddddd")
INST(FNMUL_float, "FNMUL (scalar)", "00011110yy1mmmmm100010nnnnnddddd")

View file

@ -113,6 +113,21 @@ bool TranslatorVisitor::FMIN_float(Imm<2> type, Vec Vm, Vec Vn, Vec Vd) {
return true;
}
bool TranslatorVisitor::FMAXNM_float(Imm<2> type, Vec Vm, Vec Vn, Vec Vd) {
auto datasize = GetDataSize(type);
if (!datasize) {
return UnallocatedEncoding();
}
const IR::U32U64 operand1 = V_scalar(*datasize, Vn);
const IR::U32U64 operand2 = V_scalar(*datasize, Vm);
const IR::U32U64 result = ir.FPMaxNumeric(operand1, operand2, true);
V_scalar(*datasize, Vd, result);
return true;
}
bool TranslatorVisitor::FNMUL_float(Imm<2> type, Vec Vm, Vec Vn, Vec Vd) {
auto datasize = GetDataSize(type);
if (!datasize) {

View file

@ -1185,6 +1185,16 @@ U32U64 IREmitter::FPMax(const U32U64& a, const U32U64& b, bool fpscr_controlled)
}
}
U32U64 IREmitter::FPMaxNumeric(const U32U64& a, const U32U64& b, bool fpscr_controlled) {
ASSERT(fpscr_controlled);
ASSERT(a.GetType() == b.GetType());
if (a.GetType() == Type::U32) {
return Inst<U32>(Opcode::FPMaxNumeric32, a, b);
} else {
return Inst<U64>(Opcode::FPMaxNumeric64, a, b);
}
}
U32U64 IREmitter::FPMin(const U32U64& a, const U32U64& b, bool fpscr_controlled) {
ASSERT(fpscr_controlled);
ASSERT(a.GetType() == b.GetType());

View file

@ -247,6 +247,7 @@ public:
NZCV FPCompare(const U32U64& a, const U32U64& b, bool exc_on_qnan, bool fpscr_controlled);
U32U64 FPDiv(const U32U64& a, const U32U64& b, bool fpscr_controlled);
U32U64 FPMax(const U32U64& a, const U32U64& b, bool fpscr_controlled);
U32U64 FPMaxNumeric(const U32U64& a, const U32U64& b, bool fpscr_controlled);
U32U64 FPMin(const U32U64& a, const U32U64& b, bool fpscr_controlled);
U32U64 FPMul(const U32U64& a, const U32U64& b, bool fpscr_controlled);
U32U64 FPNeg(const U32U64& a);

View file

@ -300,6 +300,8 @@ OPCODE(FPDiv32, T::U32, T::U32, T::U32
OPCODE(FPDiv64, T::U64, T::U64, T::U64 )
OPCODE(FPMax32, T::U32, T::U32, T::U32 )
OPCODE(FPMax64, T::U64, T::U64, T::U64 )
OPCODE(FPMaxNumeric32, T::U32, T::U32, T::U32 )
OPCODE(FPMaxNumeric64, T::U64, T::U64, T::U64 )
OPCODE(FPMin32, T::U32, T::U32, T::U32 )
OPCODE(FPMin64, T::U64, T::U64, T::U64 )
OPCODE(FPMul32, T::U32, T::U32, T::U32 )