opcodes: Add 64-bit CountLeadingZeroes opcode

This commit is contained in:
Lioncash 2018-01-22 10:51:40 -05:00 committed by MerryMage
parent 4c4efb2213
commit b612782445
4 changed files with 43 additions and 3 deletions

View file

@ -1302,7 +1302,7 @@ void EmitX64<JST>::EmitByteReverseDual(EmitContext& ctx, IR::Inst* inst) {
} }
template <typename JST> template <typename JST>
void EmitX64<JST>::EmitCountLeadingZeros(EmitContext& ctx, IR::Inst* inst) { void EmitX64<JST>::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) { if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) {
Xbyak::Reg32 source = ctx.reg_alloc.UseGpr(args[0]).cvt32(); Xbyak::Reg32 source = ctx.reg_alloc.UseGpr(args[0]).cvt32();
@ -1326,6 +1326,31 @@ void EmitX64<JST>::EmitCountLeadingZeros(EmitContext& ctx, IR::Inst* inst) {
} }
} }
template <typename JST>
void EmitX64<JST>::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) {
Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]).cvt64();
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
code->lzcnt(result, source);
ctx.reg_alloc.DefineValue(inst, result);
} else {
Xbyak::Reg64 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt64();
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
// The result of a bsr of zero is undefined, but zf is set after it.
code->bsr(result, source);
code->mov(source.cvt32(), 0xFFFFFFFF);
code->cmovz(result.cvt32(), source.cvt32());
code->neg(result.cvt32());
code->add(result.cvt32(), 63);
ctx.reg_alloc.DefineValue(inst, result);
}
}
template <typename JST> template <typename JST>
void EmitX64<JST>::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) { void EmitX64<JST>::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);

View file

@ -427,7 +427,19 @@ U64 IREmitter::ByteReverseDual(const U64& a) {
} }
U32 IREmitter::CountLeadingZeros(const U32& a) { U32 IREmitter::CountLeadingZeros(const U32& a) {
return Inst<U32>(Opcode::CountLeadingZeros, a); return Inst<U32>(Opcode::CountLeadingZeros32, a);
}
U64 IREmitter::CountLeadingZeros(const U64& a) {
return Inst<U64>(Opcode::CountLeadingZeros64, a);
}
U32U64 IREmitter::CountLeadingZeros(const U32U64& a) {
if (a.GetType() == IR::Type::U32) {
return Inst<U32>(Opcode::CountLeadingZeros32, a);
}
return Inst<U64>(Opcode::CountLeadingZeros64, a);
} }
ResultAndOverflow<U32> IREmitter::SignedSaturatedAdd(const U32& a, const U32& b) { ResultAndOverflow<U32> IREmitter::SignedSaturatedAdd(const U32& a, const U32& b) {

View file

@ -139,6 +139,8 @@ public:
U16 ByteReverseHalf(const U16& a); U16 ByteReverseHalf(const U16& a);
U64 ByteReverseDual(const U64& a); U64 ByteReverseDual(const U64& a);
U32 CountLeadingZeros(const U32& a); U32 CountLeadingZeros(const U32& a);
U64 CountLeadingZeros(const U64& a);
U32U64 CountLeadingZeros(const U32U64& a);
ResultAndOverflow<U32> SignedSaturatedAdd(const U32& a, const U32& b); ResultAndOverflow<U32> SignedSaturatedAdd(const U32& a, const U32& b);
ResultAndOverflow<U32> SignedSaturatedSub(const U32& a, const U32& b); ResultAndOverflow<U32> SignedSaturatedSub(const U32& a, const U32& b);

View file

@ -115,7 +115,8 @@ OPCODE(ZeroExtendWordToLong, T::U64, T::U32
OPCODE(ByteReverseWord, T::U32, T::U32 ) OPCODE(ByteReverseWord, T::U32, T::U32 )
OPCODE(ByteReverseHalf, T::U16, T::U16 ) OPCODE(ByteReverseHalf, T::U16, T::U16 )
OPCODE(ByteReverseDual, T::U64, T::U64 ) OPCODE(ByteReverseDual, T::U64, T::U64 )
OPCODE(CountLeadingZeros, T::U32, T::U32 ) OPCODE(CountLeadingZeros32, T::U32, T::U32 )
OPCODE(CountLeadingZeros64, T::U64, T::U64 )
// Saturated instructions // Saturated instructions
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 ) OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )