emit_x64_saturation: Improve codegen for saturated result in EmitSignedSaturation

2020-06-12 15:24:37 +01:00 · 2020-06-12 15:24:37 +01:00 · 3ccc415c52
commit 3ccc415c52
parent e953f67201
1 changed files with 3 additions and 6 deletions
--- a/src/backend/x64/emit_x64_saturation.cpp
+++ b/src/backend/x64/emit_x64_saturation.cpp
@ -219,21 +219,18 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
    const u32 mask = (1u << N) - 1;
    const u32 positive_saturated_value = (1u << (N - 1)) - 1;
    const u32 negative_saturated_value = 1u << (N - 1);
-    const u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);

    const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
    const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
    const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
-    const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();

    // overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
    code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]);

    // Put the appropriate saturated value in result
-    code.cmp(reg_a, positive_saturated_value);
-    code.mov(tmp, positive_saturated_value);
-    code.mov(result, sext_negative_satured_value);
-    code.cmovg(result, tmp);
+    code.mov(result, reg_a);
+    code.sar(result, 31);
+    code.xor_(result, positive_saturated_value);

    // Do the saturation
    code.cmp(overflow, mask);