emit_x64_saturation: Improve codegen for saturated result in EmitSignedSaturation
This commit is contained in:
parent
e953f67201
commit
3ccc415c52
1 changed files with 3 additions and 6 deletions
|
@ -219,21 +219,18 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const u32 mask = (1u << N) - 1;
|
const u32 mask = (1u << N) - 1;
|
||||||
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
|
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
|
||||||
const u32 negative_saturated_value = 1u << (N - 1);
|
const u32 negative_saturated_value = 1u << (N - 1);
|
||||||
const u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);
|
|
||||||
|
|
||||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||||
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
|
||||||
|
|
||||||
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
|
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
|
||||||
code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]);
|
code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]);
|
||||||
|
|
||||||
// Put the appropriate saturated value in result
|
// Put the appropriate saturated value in result
|
||||||
code.cmp(reg_a, positive_saturated_value);
|
code.mov(result, reg_a);
|
||||||
code.mov(tmp, positive_saturated_value);
|
code.sar(result, 31);
|
||||||
code.mov(result, sext_negative_satured_value);
|
code.xor_(result, positive_saturated_value);
|
||||||
code.cmovg(result, tmp);
|
|
||||||
|
|
||||||
// Do the saturation
|
// Do the saturation
|
||||||
code.cmp(overflow, mask);
|
code.cmp(overflow, mask);
|
||||||
|
|
Loading…
Reference in a new issue