opcodes: Add 64-bit CountLeadingZeroes opcode
This commit is contained in:
parent
4c4efb2213
commit
b612782445
4 changed files with 43 additions and 3 deletions
|
@ -1302,7 +1302,7 @@ void EmitX64<JST>::EmitByteReverseDual(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename JST>
|
template <typename JST>
|
||||||
void EmitX64<JST>::EmitCountLeadingZeros(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64<JST>::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) {
|
if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) {
|
||||||
Xbyak::Reg32 source = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
Xbyak::Reg32 source = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||||
|
@ -1326,6 +1326,31 @@ void EmitX64<JST>::EmitCountLeadingZeros(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename JST>
|
||||||
|
void EmitX64<JST>::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) {
|
||||||
|
Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]).cvt64();
|
||||||
|
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
|
||||||
|
|
||||||
|
code->lzcnt(result, source);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
} else {
|
||||||
|
Xbyak::Reg64 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt64();
|
||||||
|
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
|
||||||
|
|
||||||
|
// The result of a bsr of zero is undefined, but zf is set after it.
|
||||||
|
code->bsr(result, source);
|
||||||
|
code->mov(source.cvt32(), 0xFFFFFFFF);
|
||||||
|
code->cmovz(result.cvt32(), source.cvt32());
|
||||||
|
code->neg(result.cvt32());
|
||||||
|
code->add(result.cvt32(), 63);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename JST>
|
template <typename JST>
|
||||||
void EmitX64<JST>::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64<JST>::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
|
@ -427,7 +427,19 @@ U64 IREmitter::ByteReverseDual(const U64& a) {
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::CountLeadingZeros(const U32& a) {
|
U32 IREmitter::CountLeadingZeros(const U32& a) {
|
||||||
return Inst<U32>(Opcode::CountLeadingZeros, a);
|
return Inst<U32>(Opcode::CountLeadingZeros32, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
U64 IREmitter::CountLeadingZeros(const U64& a) {
|
||||||
|
return Inst<U64>(Opcode::CountLeadingZeros64, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
U32U64 IREmitter::CountLeadingZeros(const U32U64& a) {
|
||||||
|
if (a.GetType() == IR::Type::U32) {
|
||||||
|
return Inst<U32>(Opcode::CountLeadingZeros32, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Inst<U64>(Opcode::CountLeadingZeros64, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
ResultAndOverflow<U32> IREmitter::SignedSaturatedAdd(const U32& a, const U32& b) {
|
ResultAndOverflow<U32> IREmitter::SignedSaturatedAdd(const U32& a, const U32& b) {
|
||||||
|
|
|
@ -139,6 +139,8 @@ public:
|
||||||
U16 ByteReverseHalf(const U16& a);
|
U16 ByteReverseHalf(const U16& a);
|
||||||
U64 ByteReverseDual(const U64& a);
|
U64 ByteReverseDual(const U64& a);
|
||||||
U32 CountLeadingZeros(const U32& a);
|
U32 CountLeadingZeros(const U32& a);
|
||||||
|
U64 CountLeadingZeros(const U64& a);
|
||||||
|
U32U64 CountLeadingZeros(const U32U64& a);
|
||||||
|
|
||||||
ResultAndOverflow<U32> SignedSaturatedAdd(const U32& a, const U32& b);
|
ResultAndOverflow<U32> SignedSaturatedAdd(const U32& a, const U32& b);
|
||||||
ResultAndOverflow<U32> SignedSaturatedSub(const U32& a, const U32& b);
|
ResultAndOverflow<U32> SignedSaturatedSub(const U32& a, const U32& b);
|
||||||
|
|
|
@ -115,7 +115,8 @@ OPCODE(ZeroExtendWordToLong, T::U64, T::U32
|
||||||
OPCODE(ByteReverseWord, T::U32, T::U32 )
|
OPCODE(ByteReverseWord, T::U32, T::U32 )
|
||||||
OPCODE(ByteReverseHalf, T::U16, T::U16 )
|
OPCODE(ByteReverseHalf, T::U16, T::U16 )
|
||||||
OPCODE(ByteReverseDual, T::U64, T::U64 )
|
OPCODE(ByteReverseDual, T::U64, T::U64 )
|
||||||
OPCODE(CountLeadingZeros, T::U32, T::U32 )
|
OPCODE(CountLeadingZeros32, T::U32, T::U32 )
|
||||||
|
OPCODE(CountLeadingZeros64, T::U64, T::U64 )
|
||||||
|
|
||||||
// Saturated instructions
|
// Saturated instructions
|
||||||
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )
|
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )
|
||||||
|
|
Loading…
Reference in a new issue