diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index 3067500a..8dff0d23 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -187,6 +187,14 @@ void EmitX64::EmitFPVectorAdd64(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation64(code, ctx, inst, &Xbyak::CodeGenerator::addpd); } +void EmitX64::EmitFPVectorDiv32(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::divps); +} + +void EmitX64::EmitFPVectorDiv64(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation64(code, ctx, inst, &Xbyak::CodeGenerator::divpd); +} + void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::subps); } diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 0c8eaafb..ca2125be 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -770,7 +770,7 @@ INST(MLS_vec, "MLS (vector)", "0Q101 //INST(FCMGE_reg_4, "FCMGE (register)", "0Q1011100z1mmmmm111001nnnnnddddd") //INST(FACGE_4, "FACGE", "0Q1011100z1mmmmm111011nnnnnddddd") //INST(FMAXP_vec_2, "FMAXP (vector)", "0Q1011100z1mmmmm111101nnnnnddddd") -//INST(FDIV_2, "FDIV (vector)", "0Q1011100z1mmmmm111111nnnnnddddd") +INST(FDIV_2, "FDIV (vector)", "0Q1011100z1mmmmm111111nnnnnddddd") INST(EOR_asimd, "EOR (vector)", "0Q101110001mmmmm000111nnnnnddddd") INST(BSL, "BSL", "0Q101110011mmmmm000111nnnnnddddd") //INST(FMINNMP_vec_2, "FMINNMP (vector)", "0Q1011101z1mmmmm110001nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_three_same.cpp b/src/frontend/A64/translate/impl/simd_three_same.cpp index ca81197b..f97cd6fc 100644 --- a/src/frontend/A64/translate/impl/simd_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_three_same.cpp @@ -348,6 +348,23 @@ bool TranslatorVisitor::EOR_asimd(bool Q, Vec Vm, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::FDIV_2(bool Q, bool sz, Vec Vm, Vec Vn, Vec Vd) { + if (sz && !Q) { + return ReservedValue(); + } + const size_t esize = sz ? 64 : 32; + const size_t datasize = Q ? 128 : 64; + + const IR::U128 operand1 = V(datasize, Vn); + const IR::U128 operand2 = V(datasize, Vm); + IR::U128 result = ir.FPVectorDiv(esize, operand1, operand2); + if (datasize == 64) { + result = ir.VectorZeroUpper(result); + } + V(datasize, Vd, result); + return true; +} + bool TranslatorVisitor::BIF(bool Q, Vec Vm, Vec Vn, Vec Vd) { const size_t datasize = Q ? 128 : 64; diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index bd3020b2..92cbe141 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1331,6 +1331,17 @@ U128 IREmitter::FPVectorAdd(size_t esize, const U128& a, const U128& b) { return {}; } +U128 IREmitter::FPVectorDiv(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 32: + return Inst(Opcode::FPVectorDiv32, a, b); + case 64: + return Inst(Opcode::FPVectorDiv64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) { switch (esize) { case 32: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index b315047a..5adb708e 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -268,6 +268,7 @@ public: U64 FPU32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled); U128 FPVectorAdd(size_t esize, const U128& a, const U128& b); + U128 FPVectorDiv(size_t esize, const U128& a, const U128& b); U128 FPVectorSub(size_t esize, const U128& a, const U128& b); void Breakpoint(); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index f471ffa9..652eca56 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -343,6 +343,8 @@ OPCODE(FPS32ToDouble, T::U64, T::U32, T::U1 // Floating-point vector instructions OPCODE(FPVectorAdd32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorAdd64, T::U128, T::U128, T::U128 ) +OPCODE(FPVectorDiv32, T::U128, T::U128, T::U128 ) +OPCODE(FPVectorDiv64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorSub32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorSub64, T::U128, T::U128, T::U128 )