diff --git a/src/backend/x64/emit_x64_data_processing.cpp b/src/backend/x64/emit_x64_data_processing.cpp index 7959693a..619c95f8 100644 --- a/src/backend/x64/emit_x64_data_processing.cpp +++ b/src/backend/x64/emit_x64_data_processing.cpp @@ -222,6 +222,38 @@ void EmitX64::EmitExtractRegister64(Dynarmic::Backend::X64::EmitContext& ctx, IR EmitExtractRegister(code, ctx, inst, 64); } +static void EmitReplicateBit(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit_size) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const u8 bit = args[1].GetImmediateU8(); + + if (bit == bit_size - 1) { + const Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(bit_size); + + code.sar(result, bit_size - 1); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + + const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bit_size); + const Xbyak::Reg result = ctx.reg_alloc.ScratchGpr().changeBit(bit_size); + + code.xor_(result, result); + code.bt(value, bit); + code.sbb(result, result); + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitX64::EmitReplicateBit32(Dynarmic::Backend::X64::EmitContext& ctx, IR::Inst* inst) { + EmitReplicateBit(code, ctx, inst, 32); +} + +void EmitX64::EmitReplicateBit64(Dynarmic::Backend::X64::EmitContext& ctx, IR::Inst* inst) { + EmitReplicateBit(code, ctx, inst, 64); +} + void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { const auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); diff --git a/src/frontend/A64/translate/impl/data_processing_bitfield.cpp b/src/frontend/A64/translate/impl/data_processing_bitfield.cpp index ca19e8fa..5357bb62 100644 --- a/src/frontend/A64/translate/impl/data_processing_bitfield.cpp +++ b/src/frontend/A64/translate/impl/data_processing_bitfield.cpp @@ -8,12 +8,6 @@ namespace Dynarmic::A64 { -static IR::U32U64 ReplicateBit(IREmitter& ir, const IR::U32U64& value, u8 bit_position_to_replicate) { - const u8 datasize = value.GetType() == IR::Type::U64 ? 64 : 32; - const auto bit = ir.LogicalShiftLeft(value, ir.Imm8(datasize - 1 - bit_position_to_replicate)); - return ir.ArithmeticShiftRight(bit, ir.Imm8(datasize - 1)); -} - bool TranslatorVisitor::SBFM(bool sf, bool N, Imm<6> immr, Imm<6> imms, Reg Rn, Reg Rd) { if (sf && !N) { return ReservedValue(); @@ -34,7 +28,7 @@ bool TranslatorVisitor::SBFM(bool sf, bool N, Imm<6> immr, Imm<6> imms, Reg Rn, const auto src = X(datasize, Rn); auto bot = ir.And(ir.RotateRight(src, ir.Imm8(R)), I(datasize, masks->wmask)); - auto top = ReplicateBit(ir, src, S); + auto top = ir.ReplicateBit(src, S); top = ir.And(top, I(datasize, ~masks->tmask)); bot = ir.And(bot, I(datasize, masks->tmask)); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index c00d3758..70121fa0 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -469,6 +469,16 @@ U32U64 IREmitter::ExtractRegister(const U32U64& a, const U32U64& b, const U8& ls return Inst(Opcode::ExtractRegister64, a, b, lsb); } +U32U64 IREmitter::ReplicateBit(const U32U64& a, u8 bit) { + if (a.GetType() == IR::Type::U32) { + ASSERT(bit < 32); + return Inst(Opcode::ReplicateBit32, a, Imm8(bit)); + } + + ASSERT(bit < 64); + return Inst(Opcode::ReplicateBit64, a, Imm8(bit)); +} + U32U64 IREmitter::MaxSigned(const U32U64& a, const U32U64& b) { if (a.GetType() == IR::Type::U32) { return Inst(Opcode::MaxSigned32, a, b); diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index f984d039..02768920 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -152,6 +152,7 @@ public: U64 ByteReverseDual(const U64& a); U32U64 CountLeadingZeros(const U32U64& a); U32U64 ExtractRegister(const U32U64& a, const U32U64& b, const U8& lsb); + U32U64 ReplicateBit(const U32U64& a, u8 bit); U32U64 MaxSigned(const U32U64& a, const U32U64& b); U32U64 MaxUnsigned(const U32U64& a, const U32U64& b); U32U64 MinSigned(const U32U64& a, const U32U64& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index a2a90a49..3a101084 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -159,6 +159,8 @@ OPCODE(CountLeadingZeros32, U32, U32 OPCODE(CountLeadingZeros64, U64, U64 ) OPCODE(ExtractRegister32, U32, U32, U32, U8 ) OPCODE(ExtractRegister64, U64, U64, U64, U8 ) +OPCODE(ReplicateBit32, U32, U32, U8 ) +OPCODE(ReplicateBit64, U64, U64, U8 ) OPCODE(MaxSigned32, U32, U32, U32 ) OPCODE(MaxSigned64, U64, U64, U64 ) OPCODE(MaxUnsigned32, U32, U32, U32 )