emit_x64_floating_point: Correct FP{Max,Min}{32,64} implementations for -0/+0

2018-02-18 15:08:32 +00:00 · 2018-02-18 15:08:32 +00:00 · 6541ec064d
commit 6541ec064d
parent 2080a51f41
1 changed files with 84 additions and 12 deletions
--- a/src/backend_x64/emit_x64_floating_point.cpp
+++ b/src/backend_x64/emit_x64_floating_point.cpp
@ -4,6 +4,8 @@
 * General Public License version 2 or any later version.
 */
 #include <type_traits>
 #include "backend_x64/abi.h"
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
@ -213,7 +215,8 @@ static Xbyak::Label ProcessNaN64(BlockOfCode& code, Xbyak::Xmm a) {
    return end;
 }
-static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
+template <typename Function>
 static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    Xbyak::Label end;
@ -229,7 +232,11 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
    if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
        end = PreProcessNaNs32(code, result, operand);
    }
    if constexpr (std::is_member_function_pointer_v<Function>) {
        (code.*fn)(result, operand);
    } else {
        fn(result, operand);
    }
    if (ctx.FPSCR_FTZ()) {
        FlushToZero32(code, result, gpr_scratch);
    }
@ -243,7 +250,8 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
    ctx.reg_alloc.DefineValue(inst, result);
 }
-static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
+template <typename Function>
 static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    Xbyak::Label end;
@ -259,7 +267,11 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
    if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
        end = PreProcessNaNs64(code, result, operand);
    }
    if constexpr (std::is_member_function_pointer_v<Function>) {
        (code.*fn)(result, operand);
    } else {
        fn(result, operand);
    }
    if (ctx.FPSCR_FTZ()) {
        FlushToZero64(code, result, gpr_scratch);
    }
@ -273,7 +285,8 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
    ctx.reg_alloc.DefineValue(inst, result);
 }
-static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
+template <typename Function>
 static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    Xbyak::Label end;
@ -287,7 +300,11 @@ static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void
    if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
        end = ProcessNaN32(code, result);
    }
    if constexpr (std::is_member_function_pointer_v<Function>) {
        (code.*fn)(result, result);
    } else {
        fn(result);
    }
    if (ctx.FPSCR_FTZ()) {
        FlushToZero32(code, result, gpr_scratch);
    }
@ -301,7 +318,8 @@ static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void
    ctx.reg_alloc.DefineValue(inst, result);
 }
-static void FPTwoOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
+template <typename Function>
 static void FPTwoOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    Xbyak::Label end;
@ -315,7 +333,11 @@ static void FPTwoOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void
    if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
        end = ProcessNaN64(code, result);
    }
    if constexpr (std::is_member_function_pointer_v<Function>) {
        (code.*fn)(result, result);
    } else {
        fn(result);
    }
    if (ctx.FPSCR_FTZ()) {
        FlushToZero64(code, result, gpr_scratch);
    }
@ -382,19 +404,69 @@ void EmitX64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) {
 }
 void EmitX64::EmitFPMax32(EmitContext& ctx, IR::Inst* inst) {
-    FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::maxss);
+    FPThreeOp32(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand){
        Xbyak::Label normal, end;
        code.ucomiss(result, operand);
        code.jnz(normal);
        if (!ctx.AccurateNaN()) {
            Xbyak::Label notnan;
            code.jnp(notnan);
            code.addss(result, operand);
            code.jmp(end);
            code.L(notnan);
        }
        code.andps(result, operand);
        code.jmp(end);
        code.L(normal);
        code.maxss(result, operand);
        code.L(end);
    });
 }
 void EmitX64::EmitFPMax64(EmitContext& ctx, IR::Inst* inst) {
-    FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::maxsd);
+    FPThreeOp64(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand){
        Xbyak::Label normal, end;
        code.ucomisd(result, operand);
        code.jnz(normal);
        if (!ctx.AccurateNaN()) {
            Xbyak::Label notnan;
            code.jnp(notnan);
            code.addsd(result, operand);
            code.jmp(end);
            code.L(notnan);
        }
        code.andps(result, operand);
        code.jmp(end);
        code.L(normal);
        code.maxsd(result, operand);
        code.L(end);
    });
 }
 void EmitX64::EmitFPMin32(EmitContext& ctx, IR::Inst* inst) {
-    FPThreeOp32(code, ctx, inst, &Xbyak::CodeGenerator::minss);
+    FPThreeOp32(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand){
        Xbyak::Label normal, end;
        code.ucomiss(result, operand);
        code.jnz(normal);
        code.orps(result, operand);
        code.jmp(end);
        code.L(normal);
        code.minss(result, operand);
        code.L(end);
    });
 }
 void EmitX64::EmitFPMin64(EmitContext& ctx, IR::Inst* inst) {
-    FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::minsd);
+    FPThreeOp64(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm operand){
        Xbyak::Label normal, end;
        code.ucomisd(result, operand);
        code.jnz(normal);
        code.orps(result, operand);
        code.jmp(end);
        code.L(normal);
        code.minsd(result, operand);
        code.L(end);
    });
 }
 void EmitX64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) {