emit_x64_saturation: Improve codegen for saturated result in EmitSignedSaturation

This commit is contained in:
MerryMage 2020-06-12 15:24:37 +01:00
parent e953f67201
commit 3ccc415c52

View file

@ -219,21 +219,18 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const u32 mask = (1u << N) - 1;
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
const u32 negative_saturated_value = 1u << (N - 1);
const u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]);
// Put the appropriate saturated value in result
code.cmp(reg_a, positive_saturated_value);
code.mov(tmp, positive_saturated_value);
code.mov(result, sext_negative_satured_value);
code.cmovg(result, tmp);
code.mov(result, reg_a);
code.sar(result, 31);
code.xor_(result, positive_saturated_value);
// Do the saturation
code.cmp(overflow, mask);