ir: Add opcodes for scalar signed saturated doubling multiplies
This commit is contained in:
parent
9c03311fed
commit
7ebfd0f31c
4 changed files with 81 additions and 0 deletions
|
@ -134,6 +134,66 @@ void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
|
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||||
|
const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
||||||
|
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
|
code.movsx(x, x.cvt16());
|
||||||
|
code.movsx(y, y.cvt16());
|
||||||
|
|
||||||
|
code.imul(x, y);
|
||||||
|
code.lea(y, ptr[x.cvt64() + x.cvt64()]);
|
||||||
|
code.mov(tmp, x);
|
||||||
|
code.shr(tmp, 15);
|
||||||
|
code.xor_(y, x);
|
||||||
|
code.mov(y, 0x7FFF);
|
||||||
|
code.cmovns(y, tmp);
|
||||||
|
|
||||||
|
if (overflow_inst) {
|
||||||
|
code.sets(tmp.cvt8());
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(overflow_inst, tmp);
|
||||||
|
ctx.EraseInstruction(overflow_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]);
|
||||||
|
const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
|
||||||
|
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
|
code.movsxd(x, x.cvt32());
|
||||||
|
code.movsxd(y, y.cvt32());
|
||||||
|
|
||||||
|
code.imul(x, y);
|
||||||
|
code.lea(y, ptr[x + x]);
|
||||||
|
code.mov(tmp, x);
|
||||||
|
code.shr(tmp, 31);
|
||||||
|
code.xor_(y, x);
|
||||||
|
code.mov(y.cvt32(), 0x7FFFFFFF);
|
||||||
|
code.cmovns(y.cvt32(), tmp.cvt32());
|
||||||
|
|
||||||
|
if (overflow_inst) {
|
||||||
|
code.sets(tmp.cvt8());
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(overflow_inst, tmp);
|
||||||
|
ctx.EraseInstruction(overflow_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, y);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
|
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
|
||||||
}
|
}
|
||||||
|
|
|
@ -501,6 +501,24 @@ ResultAndOverflow<UAny> IREmitter::SignedSaturatedAdd(const UAny& a, const UAny&
|
||||||
return {result, overflow};
|
return {result, overflow};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ResultAndOverflow<UAny> IREmitter::SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b) {
|
||||||
|
ASSERT(a.GetType() == b.GetType());
|
||||||
|
const auto result = [&]() -> IR::UAny {
|
||||||
|
switch (a.GetType()) {
|
||||||
|
case IR::Type::U16:
|
||||||
|
return Inst<U16>(Opcode::SignedSaturatedDoublingMultiplyReturnHigh16, a, b);
|
||||||
|
case IR::Type::U32:
|
||||||
|
return Inst<U32>(Opcode::SignedSaturatedDoublingMultiplyReturnHigh32, a, b);
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return IR::UAny{};
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
|
||||||
|
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
|
||||||
|
return {result, overflow};
|
||||||
|
}
|
||||||
|
|
||||||
ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) {
|
ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) {
|
||||||
ASSERT(a.GetType() == b.GetType());
|
ASSERT(a.GetType() == b.GetType());
|
||||||
const auto result = [&]() -> IR::UAny {
|
const auto result = [&]() -> IR::UAny {
|
||||||
|
|
|
@ -143,6 +143,7 @@ public:
|
||||||
U32U64 MinUnsigned(const U32U64& a, const U32U64& b);
|
U32U64 MinUnsigned(const U32U64& a, const U32U64& b);
|
||||||
|
|
||||||
ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b);
|
ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b);
|
||||||
|
ResultAndOverflow<UAny> SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b);
|
||||||
ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b);
|
ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b);
|
||||||
ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
|
ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
|
||||||
ResultAndOverflow<UAny> UnsignedSaturatedAdd(const UAny& a, const UAny& b);
|
ResultAndOverflow<UAny> UnsignedSaturatedAdd(const UAny& a, const UAny& b);
|
||||||
|
|
|
@ -157,6 +157,8 @@ OPCODE(SignedSaturatedAdd8, U8, U8,
|
||||||
OPCODE(SignedSaturatedAdd16, U16, U16, U16 )
|
OPCODE(SignedSaturatedAdd16, U16, U16, U16 )
|
||||||
OPCODE(SignedSaturatedAdd32, U32, U32, U32 )
|
OPCODE(SignedSaturatedAdd32, U32, U32, U32 )
|
||||||
OPCODE(SignedSaturatedAdd64, U64, U64, U64 )
|
OPCODE(SignedSaturatedAdd64, U64, U64, U64 )
|
||||||
|
OPCODE(SignedSaturatedDoublingMultiplyReturnHigh16, U16, U16, U16 )
|
||||||
|
OPCODE(SignedSaturatedDoublingMultiplyReturnHigh32, U32, U32, U32 )
|
||||||
OPCODE(SignedSaturatedSub8, U8, U8, U8 )
|
OPCODE(SignedSaturatedSub8, U8, U8, U8 )
|
||||||
OPCODE(SignedSaturatedSub16, U16, U16, U16 )
|
OPCODE(SignedSaturatedSub16, U16, U16, U16 )
|
||||||
OPCODE(SignedSaturatedSub32, U32, U32, U32 )
|
OPCODE(SignedSaturatedSub32, U32, U32, U32 )
|
||||||
|
|
Loading…
Reference in a new issue