diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index eea98ded..d92e1134 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -195,6 +195,14 @@ void EmitX64::EmitFPVectorDiv64(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation64(code, ctx, inst, &Xbyak::CodeGenerator::divpd); } +void EmitX64::EmitFPVectorMul32(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::mulps); +} + +void EmitX64::EmitFPVectorMul64(EmitContext& ctx, IR::Inst* inst) { + EmitVectorOperation64(code, ctx, inst, &Xbyak::CodeGenerator::mulpd); +} + void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::subps); } diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index cd9af850..aa5c4923 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -766,7 +766,7 @@ INST(MLS_vec, "MLS (vector)", "0Q101 //INST(FMAXNMP_vec_2, "FMAXNMP (vector)", "0Q1011100z1mmmmm110001nnnnnddddd") //INST(FMLAL_vec_2, "FMLAL, FMLAL2 (vector)", "0Q1011100z1mmmmm110011nnnnnddddd") //INST(FADDP_vec_2, "FADDP (vector)", "0Q1011100z1mmmmm110101nnnnnddddd") -//INST(FMUL_vec_2, "FMUL (vector)", "0Q1011100z1mmmmm110111nnnnnddddd") +INST(FMUL_vec_2, "FMUL (vector)", "0Q1011100z1mmmmm110111nnnnnddddd") //INST(FCMGE_reg_4, "FCMGE (register)", "0Q1011100z1mmmmm111001nnnnnddddd") //INST(FACGE_4, "FACGE", "0Q1011100z1mmmmm111011nnnnnddddd") //INST(FMAXP_vec_2, "FMAXP (vector)", "0Q1011100z1mmmmm111101nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_three_same.cpp b/src/frontend/A64/translate/impl/simd_three_same.cpp index 5d16e8a0..71b6130e 100644 --- a/src/frontend/A64/translate/impl/simd_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_three_same.cpp @@ -383,6 +383,20 @@ bool TranslatorVisitor::EOR_asimd(bool Q, Vec Vm, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::FMUL_vec_2(bool Q, bool sz, Vec Vm, Vec Vn, Vec Vd) { + if (sz && !Q) { + return ReservedValue(); + } + const size_t esize = sz ? 64 : 32; + const size_t datasize = Q ? 128 : 64; + + const IR::U128 operand1 = V(datasize, Vn); + const IR::U128 operand2 = V(datasize, Vm); + IR::U128 result = ir.FPVectorMul(esize, operand1, operand2); + V(datasize, Vd, result); + return true; +} + bool TranslatorVisitor::FDIV_2(bool Q, bool sz, Vec Vm, Vec Vn, Vec Vd) { if (sz && !Q) { return ReservedValue(); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index c2955dfe..5a6173bf 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1382,6 +1382,17 @@ U128 IREmitter::FPVectorDiv(size_t esize, const U128& a, const U128& b) { return {}; } +U128 IREmitter::FPVectorMul(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 32: + return Inst(Opcode::FPVectorMul32, a, b); + case 64: + return Inst(Opcode::FPVectorMul64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) { switch (esize) { case 32: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 73493370..783d4ea5 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -274,6 +274,7 @@ public: U128 FPVectorAdd(size_t esize, const U128& a, const U128& b); U128 FPVectorDiv(size_t esize, const U128& a, const U128& b); + U128 FPVectorMul(size_t esize, const U128& a, const U128& b); U128 FPVectorSub(size_t esize, const U128& a, const U128& b); void Breakpoint(); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index ffef242a..8eb1498c 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -354,6 +354,8 @@ OPCODE(FPVectorAdd32, T::U128, T::U128, T::U OPCODE(FPVectorAdd64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorDiv32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorDiv64, T::U128, T::U128, T::U128 ) +OPCODE(FPVectorMul32, T::U128, T::U128, T::U128 ) +OPCODE(FPVectorMul64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorSub32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorSub64, T::U128, T::U128, T::U128 )