opcodes: Add 64-bit CountLeadingZeroes opcode

2018-01-22 10:51:40 -05:00 · 2018-01-22 10:51:40 -05:00 · b612782445
commit b612782445
parent 4c4efb2213
4 changed files with 43 additions and 3 deletions
--- a/src/backend_x64/emit_x64.cpp
+++ b/src/backend_x64/emit_x64.cpp
@ -1302,7 +1302,7 @@ void EmitX64<JST>::EmitByteReverseDual(EmitContext& ctx, IR::Inst* inst) {
 }
 template <typename JST>
-void EmitX64<JST>::EmitCountLeadingZeros(EmitContext& ctx, IR::Inst* inst) {
+void EmitX64<JST>::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) {
        Xbyak::Reg32 source = ctx.reg_alloc.UseGpr(args[0]).cvt32();
@ -1326,6 +1326,31 @@ void EmitX64<JST>::EmitCountLeadingZeros(EmitContext& ctx, IR::Inst* inst) {
    }
 }
 template <typename JST>
 void EmitX64<JST>::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) {
   auto args = ctx.reg_alloc.GetArgumentInfo(inst);
   if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) {
       Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]).cvt64();
       Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
       code->lzcnt(result, source);
       ctx.reg_alloc.DefineValue(inst, result);
   } else {
       Xbyak::Reg64 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt64();
       Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
       // The result of a bsr of zero is undefined, but zf is set after it.
       code->bsr(result, source);
       code->mov(source.cvt32(), 0xFFFFFFFF);
       code->cmovz(result.cvt32(), source.cvt32());
       code->neg(result.cvt32());
       code->add(result.cvt32(), 63);
       ctx.reg_alloc.DefineValue(inst, result);
   }
 }
 template <typename JST>
 void EmitX64<JST>::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
    auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -427,7 +427,19 @@ U64 IREmitter::ByteReverseDual(const U64& a) {
 }
 U32 IREmitter::CountLeadingZeros(const U32& a) {
-    return Inst<U32>(Opcode::CountLeadingZeros, a);
+    return Inst<U32>(Opcode::CountLeadingZeros32, a);
 }
 U64 IREmitter::CountLeadingZeros(const U64& a) {
    return Inst<U64>(Opcode::CountLeadingZeros64, a);
 }
 U32U64 IREmitter::CountLeadingZeros(const U32U64& a) {
    if (a.GetType() == IR::Type::U32) {
        return Inst<U32>(Opcode::CountLeadingZeros32, a);
    }
    return Inst<U64>(Opcode::CountLeadingZeros64, a);
 }
 ResultAndOverflow<U32> IREmitter::SignedSaturatedAdd(const U32& a, const U32& b) {
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -139,6 +139,8 @@ public:
    U16 ByteReverseHalf(const U16& a);
    U64 ByteReverseDual(const U64& a);
    U32 CountLeadingZeros(const U32& a);
    U64 CountLeadingZeros(const U64& a);
    U32U64 CountLeadingZeros(const U32U64& a);
    ResultAndOverflow<U32> SignedSaturatedAdd(const U32& a, const U32& b);
    ResultAndOverflow<U32> SignedSaturatedSub(const U32& a, const U32& b);
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -115,7 +115,8 @@ OPCODE(ZeroExtendWordToLong,    T::U64,         T::U32
 OPCODE(ByteReverseWord,         T::U32,         T::U32                                          )
 OPCODE(ByteReverseHalf,         T::U16,         T::U16                                          )
 OPCODE(ByteReverseDual,         T::U64,         T::U64                                          )
-OPCODE(CountLeadingZeros,       T::U32,         T::U32                                          )
+OPCODE(CountLeadingZeros32,     T::U32,         T::U32                                          )
 OPCODE(CountLeadingZeros64,     T::U64,         T::U64                                          )
 // Saturated instructions
 OPCODE(SignedSaturatedAdd,      T::U32,         T::U32,         T::U32                          )