diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 0077cd2a..6058bdbc 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -1207,4 +1207,10 @@ void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, a); } +void EmitX64::EmitZeroVector(EmitContext& ctx, IR::Inst* inst) { + Xbyak::Xmm a = ctx.reg_alloc.ScratchXmm(); + code.pxor(a, a); + ctx.reg_alloc.DefineValue(inst, a); +} + } // namespace Dynarmic::BackendX64 diff --git a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp index f387145a..1b648ef3 100644 --- a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp +++ b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp @@ -29,7 +29,7 @@ bool TranslatorVisitor::CMGT_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) { const size_t datasize = Q ? 128 : 64; const IR::U128 operand = V(datasize, Vn); - const IR::U128 zero = ir.ZeroExtendToQuad(ir.Imm64(0)); // TODO: Optimize + const IR::U128 zero = ir.ZeroVector(); const IR::U128 result = ir.VectorGreaterSigned(esize, operand, zero); V(datasize, Vd, result); return true; @@ -43,7 +43,7 @@ bool TranslatorVisitor::CMEQ_zero_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) { const size_t datasize = Q ? 128 : 64; const IR::U128 operand = V(datasize, Vn); - const IR::U128 zero = ir.ZeroExtendToQuad(ir.Imm64(0)); // TODO: Optimize + const IR::U128 zero = ir.ZeroVector(); IR::U128 result = ir.VectorEqual(esize, operand, zero); if (datasize == 64) { result = ir.VectorZeroUpper(result); @@ -60,7 +60,7 @@ bool TranslatorVisitor::CMLT_2(bool Q, Imm<2> size, Vec Vn, Vec Vd) { const size_t datasize = Q ? 128 : 64; const IR::U128 operand = V(datasize, Vn); - const IR::U128 zero = ir.ZeroExtendToQuad(ir.Imm64(0)); // TODO: Optimize + const IR::U128 zero = ir.ZeroVector(); const IR::U128 result = ir.VectorLessSigned(esize, operand, zero); V(datasize, Vd, result); return true; diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 280ed6c5..2dedf077 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1137,6 +1137,10 @@ U128 IREmitter::VectorZeroUpper(const U128& a) { return Inst(Opcode::VectorZeroUpper, a); } +U128 IREmitter::ZeroVector() { + return Inst(Opcode::ZeroVector); +} + U32U64 IREmitter::FPAbs(const U32U64& a) { if (a.GetType() == Type::U32) { return Inst(Opcode::FPAbs32, a); diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index fee61b15..3ad414e9 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -241,6 +241,7 @@ public: U128 VectorSub(size_t esize, const U128& a, const U128& b); U128 VectorZeroExtend(size_t original_esize, const U128& a); U128 VectorZeroUpper(const U128& a); + U128 ZeroVector(); U32U64 FPAbs(const U32U64& a); U32U64 FPAdd(const U32U64& a, const U32U64& b, bool fpscr_controlled); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 642c3b44..c8f007d7 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -288,6 +288,7 @@ OPCODE(VectorZeroExtend16, T::U128, T::U128 OPCODE(VectorZeroExtend32, T::U128, T::U128 ) OPCODE(VectorZeroExtend64, T::U128, T::U128 ) OPCODE(VectorZeroUpper, T::U128, T::U128 ) +OPCODE(ZeroVector, T::U128, ) // Floating-point operations OPCODE(FPAbs32, T::U32, T::U32 )