From b612782445b7f411f8f38db4ebe27516d2ac99a5 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 22 Jan 2018 10:51:40 -0500 Subject: [PATCH] opcodes: Add 64-bit CountLeadingZeroes opcode --- src/backend_x64/emit_x64.cpp | 27 ++++++++++++++++++++++++++- src/frontend/ir/ir_emitter.cpp | 14 +++++++++++++- src/frontend/ir/ir_emitter.h | 2 ++ src/frontend/ir/opcodes.inc | 3 ++- 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 4a2bbb00..f14772f4 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -1302,7 +1302,7 @@ void EmitX64::EmitByteReverseDual(EmitContext& ctx, IR::Inst* inst) { } template -void EmitX64::EmitCountLeadingZeros(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) { Xbyak::Reg32 source = ctx.reg_alloc.UseGpr(args[0]).cvt32(); @@ -1326,6 +1326,31 @@ void EmitX64::EmitCountLeadingZeros(EmitContext& ctx, IR::Inst* inst) { } } +template +void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (code->DoesCpuSupport(Xbyak::util::Cpu::tLZCNT)) { + Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]).cvt64(); + Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); + + code->lzcnt(result, source); + + ctx.reg_alloc.DefineValue(inst, result); + } else { + Xbyak::Reg64 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt64(); + Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); + + // The result of a bsr of zero is undefined, but zf is set after it. + code->bsr(result, source); + code->mov(source.cvt32(), 0xFFFFFFFF); + code->cmovz(result.cvt32(), source.cvt32()); + code->neg(result.cvt32()); + code->add(result.cvt32(), 63); + + ctx.reg_alloc.DefineValue(inst, result); + } +} + template void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) { auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 7e9cd23c..6355e2be 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -427,7 +427,19 @@ U64 IREmitter::ByteReverseDual(const U64& a) { } U32 IREmitter::CountLeadingZeros(const U32& a) { - return Inst(Opcode::CountLeadingZeros, a); + return Inst(Opcode::CountLeadingZeros32, a); +} + +U64 IREmitter::CountLeadingZeros(const U64& a) { + return Inst(Opcode::CountLeadingZeros64, a); +} + +U32U64 IREmitter::CountLeadingZeros(const U32U64& a) { + if (a.GetType() == IR::Type::U32) { + return Inst(Opcode::CountLeadingZeros32, a); + } + + return Inst(Opcode::CountLeadingZeros64, a); } ResultAndOverflow IREmitter::SignedSaturatedAdd(const U32& a, const U32& b) { diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index b69296bd..24166cfe 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -139,6 +139,8 @@ public: U16 ByteReverseHalf(const U16& a); U64 ByteReverseDual(const U64& a); U32 CountLeadingZeros(const U32& a); + U64 CountLeadingZeros(const U64& a); + U32U64 CountLeadingZeros(const U32U64& a); ResultAndOverflow SignedSaturatedAdd(const U32& a, const U32& b); ResultAndOverflow SignedSaturatedSub(const U32& a, const U32& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index ef46afec..1575b91c 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -115,7 +115,8 @@ OPCODE(ZeroExtendWordToLong, T::U64, T::U32 OPCODE(ByteReverseWord, T::U32, T::U32 ) OPCODE(ByteReverseHalf, T::U16, T::U16 ) OPCODE(ByteReverseDual, T::U64, T::U64 ) -OPCODE(CountLeadingZeros, T::U32, T::U32 ) +OPCODE(CountLeadingZeros32, T::U32, T::U32 ) +OPCODE(CountLeadingZeros64, T::U64, T::U64 ) // Saturated instructions OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )