From e0c12ec2ad5a4944f3cd78d04cc30c421a569573 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 25 Jan 2018 20:57:56 -0400 Subject: [PATCH] A64: Implemented EOR (vector), ORR (vector, register) and ORN (vector) Instructions (#142) --- src/backend_x64/emit_x64_vector.cpp | 20 ++++++++++ src/frontend/A64/decoder/a64.inc | 6 +-- .../A64/translate/impl/simd_three_same.cpp | 40 +++++++++++++++++++ src/frontend/ir/ir_emitter.cpp | 12 ++++++ src/frontend/ir/ir_emitter.h | 3 ++ src/frontend/ir/opcodes.inc | 3 ++ 6 files changed, 81 insertions(+), 3 deletions(-) diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index d3ef22bf..3dd7f150 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -121,6 +121,26 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pand); } +void EmitX64::EmitVectorOr(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::por); +} + +void EmitX64::EmitVectorEor(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pxor); +} + +void EmitX64::EmitVectorNot(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm(); + + code->pcmpeqw(xmm_b, xmm_b); + code->pxor(xmm_a, xmm_b); + + ctx.reg_alloc.DefineValue(inst, xmm_a); +} + void EmitX64::EmitVectorLowerPairedAdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index ade3833f..985cd5b0 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -836,8 +836,8 @@ INST(AND_asimd, "AND (vector)", "0Q001 //INST(BIC_asimd_reg, "BIC (vector, register)", "0Q001110011mmmmm000111nnnnnddddd") //INST(FMLSL_vec_1, "FMLSL, FMLSL2 (vector)", "0Q0011101z1mmmmm111011nnnnnddddd") //INST(FMLSL_vec_2, "FMLSL, FMLSL2 (vector)", "0Q1011101z1mmmmm110011nnnnnddddd") -//INST(ORR_asimd_reg, "ORR (vector, register)", "0Q001110101mmmmm000111nnnnnddddd") -//INST(ORN_asimd, "ORN (vector)", "0Q001110111mmmmm000111nnnnnddddd") +INST(ORR_asimd_reg, "ORR (vector, register)", "0Q001110101mmmmm000111nnnnnddddd") +INST(ORN_asimd, "ORN (vector)", "0Q001110111mmmmm000111nnnnnddddd") //INST(UHADD, "UHADD", "0Q101110zz1mmmmm000001nnnnnddddd") //INST(URHADD, "URHADD", "0Q101110zz1mmmmm000101nnnnnddddd") //INST(UHSUB, "UHSUB", "0Q101110zz1mmmmm001001nnnnnddddd") @@ -849,7 +849,7 @@ INST(AND_asimd, "AND (vector)", "0Q001 //INST(PMUL, "PMUL", "0Q101110zz1mmmmm100111nnnnnddddd") //INST(UMAXP, "UMAXP", "0Q101110zz1mmmmm101001nnnnnddddd") //INST(UMINP, "UMINP", "0Q101110zz1mmmmm101011nnnnnddddd") -//INST(EOR_asimd, "EOR (vector)", "0Q101110001mmmmm000111nnnnnddddd") +INST(EOR_asimd, "EOR (vector)", "0Q101110001mmmmm000111nnnnnddddd") //INST(BSL, "BSL", "0Q101110011mmmmm000111nnnnnddddd") //INST(BIT, "BIT", "0Q101110101mmmmm000111nnnnnddddd") //INST(BIF, "BIF", "0Q101110111mmmmm000111nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_three_same.cpp b/src/frontend/A64/translate/impl/simd_three_same.cpp index 5e2f76ae..2caf4788 100644 --- a/src/frontend/A64/translate/impl/simd_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_three_same.cpp @@ -74,5 +74,45 @@ bool TranslatorVisitor::AND_asimd(bool Q, Vec Vm, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::ORR_asimd_reg(bool Q, Vec Vm, Vec Vn, Vec Vd) { + const size_t datasize = Q ? 128 : 64; + + auto operand1 = V(datasize, Vn); + auto operand2 = V(datasize, Vm); + + auto result = ir.VectorOr(operand1, operand2); + + V(datasize, Vd, result); + + return true; +} + +bool TranslatorVisitor::ORN_asimd(bool Q, Vec Vm, Vec Vn, Vec Vd) { + const size_t datasize = Q ? 128 : 64; + + auto operand1 = V(datasize, Vn); + auto operand2 = V(datasize, Vm); + + // TODO: This does not zero the upper 64 bits when datasize == 64. This may break future optimization passes. + auto result = ir.VectorOr(operand1, ir.VectorNot(operand2)); + + V(datasize, Vd, result); + + return true; +} + +bool TranslatorVisitor::EOR_asimd(bool Q, Vec Vm, Vec Vn, Vec Vd) { + const size_t datasize = Q ? 128 : 64; + + auto operand1 = V(datasize, Vn); + auto operand2 = V(datasize, Vm); + + auto result = ir.VectorEor(operand1, operand2); + + V(datasize, Vd, result); + + return true; +} + } // namespace A64 } // namespace Dynarmic diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 0f5f9ec4..60fef7c0 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -705,6 +705,18 @@ U128 IREmitter::VectorAnd(const U128& a, const U128& b) { return Inst(Opcode::VectorAnd, a, b); } +U128 IREmitter::VectorOr(const U128& a, const U128& b) { + return Inst(Opcode::VectorOr, a, b); +} + +U128 IREmitter::VectorEor(const U128& a, const U128& b) { + return Inst(Opcode::VectorEor, a, b); +} + +U128 IREmitter::VectorNot(const U128& a) { + return Inst(Opcode::VectorNot, a); +} + U128 IREmitter::VectorLowerBroadcast8(const U8& a) { return Inst(Opcode::VectorLowerBroadcast8, a); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 65fb871c..f1b116c1 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -190,6 +190,9 @@ public: U128 VectorAdd32(const U128& a, const U128& b); U128 VectorAdd64(const U128& a, const U128& b); U128 VectorAnd(const U128& a, const U128& b); + U128 VectorOr(const U128& a, const U128& b); + U128 VectorEor(const U128& a, const U128& b); + U128 VectorNot(const U128& a); U128 VectorLowerBroadcast8(const U8& a); U128 VectorLowerBroadcast16(const U16& a); U128 VectorLowerBroadcast32(const U32& a); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 67ef1569..b2218159 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -175,6 +175,9 @@ OPCODE(VectorAdd16, T::U128, T::U128, T::U128 OPCODE(VectorAdd32, T::U128, T::U128, T::U128 ) OPCODE(VectorAdd64, T::U128, T::U128, T::U128 ) OPCODE(VectorAnd, T::U128, T::U128, T::U128 ) +OPCODE(VectorOr, T::U128, T::U128, T::U128 ) +OPCODE(VectorEor, T::U128, T::U128, T::U128 ) +OPCODE(VectorNot, T::U128, T::U128 ) OPCODE(VectorLowerBroadcast8, T::U128, T::U8 ) OPCODE(VectorLowerBroadcast16, T::U128, T::U16 ) OPCODE(VectorLowerBroadcast32, T::U128, T::U32 )