ir: Add opcodes for scalar signed saturated doubling multiplies

This commit is contained in:
Lioncash 2018-09-05 19:16:41 -04:00 committed by MerryMage
parent 9c03311fed
commit 7ebfd0f31c
4 changed files with 81 additions and 0 deletions

View file

@ -134,6 +134,66 @@ void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst); EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
} }
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
code.movsx(x, x.cvt16());
code.movsx(y, y.cvt16());
code.imul(x, y);
code.lea(y, ptr[x.cvt64() + x.cvt64()]);
code.mov(tmp, x);
code.shr(tmp, 15);
code.xor_(y, x);
code.mov(y, 0x7FFF);
code.cmovns(y, tmp);
if (overflow_inst) {
code.sets(tmp.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, tmp);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, y);
}
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) {
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]);
const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
code.movsxd(x, x.cvt32());
code.movsxd(y, y.cvt32());
code.imul(x, y);
code.lea(y, ptr[x + x]);
code.mov(tmp, x);
code.shr(tmp, 31);
code.xor_(y, x);
code.mov(y.cvt32(), 0x7FFFFFFF);
code.cmovns(y.cvt32(), tmp.cvt32());
if (overflow_inst) {
code.sets(tmp.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, tmp);
ctx.EraseInstruction(overflow_inst);
}
ctx.reg_alloc.DefineValue(inst, y);
}
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst); EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
} }

View file

@ -501,6 +501,24 @@ ResultAndOverflow<UAny> IREmitter::SignedSaturatedAdd(const UAny& a, const UAny&
return {result, overflow}; return {result, overflow};
} }
ResultAndOverflow<UAny> IREmitter::SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b) {
ASSERT(a.GetType() == b.GetType());
const auto result = [&]() -> IR::UAny {
switch (a.GetType()) {
case IR::Type::U16:
return Inst<U16>(Opcode::SignedSaturatedDoublingMultiplyReturnHigh16, a, b);
case IR::Type::U32:
return Inst<U32>(Opcode::SignedSaturatedDoublingMultiplyReturnHigh32, a, b);
default:
UNREACHABLE();
return IR::UAny{};
}
}();
const auto overflow = Inst<U1>(Opcode::GetOverflowFromOp, result);
return {result, overflow};
}
ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) { ResultAndOverflow<UAny> IREmitter::SignedSaturatedSub(const UAny& a, const UAny& b) {
ASSERT(a.GetType() == b.GetType()); ASSERT(a.GetType() == b.GetType());
const auto result = [&]() -> IR::UAny { const auto result = [&]() -> IR::UAny {

View file

@ -143,6 +143,7 @@ public:
U32U64 MinUnsigned(const U32U64& a, const U32U64& b); U32U64 MinUnsigned(const U32U64& a, const U32U64& b);
ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b); ResultAndOverflow<UAny> SignedSaturatedAdd(const UAny& a, const UAny& b);
ResultAndOverflow<UAny> SignedSaturatedDoublingMultiplyReturnHigh(const UAny& a, const UAny& b);
ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b); ResultAndOverflow<UAny> SignedSaturatedSub(const UAny& a, const UAny& b);
ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to); ResultAndOverflow<U32> SignedSaturation(const U32& a, size_t bit_size_to_saturate_to);
ResultAndOverflow<UAny> UnsignedSaturatedAdd(const UAny& a, const UAny& b); ResultAndOverflow<UAny> UnsignedSaturatedAdd(const UAny& a, const UAny& b);

View file

@ -157,6 +157,8 @@ OPCODE(SignedSaturatedAdd8, U8, U8,
OPCODE(SignedSaturatedAdd16, U16, U16, U16 ) OPCODE(SignedSaturatedAdd16, U16, U16, U16 )
OPCODE(SignedSaturatedAdd32, U32, U32, U32 ) OPCODE(SignedSaturatedAdd32, U32, U32, U32 )
OPCODE(SignedSaturatedAdd64, U64, U64, U64 ) OPCODE(SignedSaturatedAdd64, U64, U64, U64 )
OPCODE(SignedSaturatedDoublingMultiplyReturnHigh16, U16, U16, U16 )
OPCODE(SignedSaturatedDoublingMultiplyReturnHigh32, U32, U32, U32 )
OPCODE(SignedSaturatedSub8, U8, U8, U8 ) OPCODE(SignedSaturatedSub8, U8, U8, U8 )
OPCODE(SignedSaturatedSub16, U16, U16, U16 ) OPCODE(SignedSaturatedSub16, U16, U16, U16 )
OPCODE(SignedSaturatedSub32, U32, U32, U32 ) OPCODE(SignedSaturatedSub32, U32, U32, U32 )