A32: Implement ASIMD VPADD (floating-point)
This commit is contained in:
parent
e006f0a205
commit
f58e247ef3
8 changed files with 27 additions and 17 deletions
|
@ -1117,15 +1117,15 @@ void EmitX64::EmitFPVectorNeg64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<32, PairedIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::haddps);
|
EmitThreeOpVectorOperation<32, PairedIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::haddps);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<64, PairedIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::haddpd);
|
EmitThreeOpVectorOperation<64, PairedIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::haddpd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<32, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
|
EmitThreeOpVectorOperation<32, PairedLowerIndexer, FpcrControlledArgument::Present>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
|
||||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
||||||
code.xorps(zero, zero);
|
code.xorps(zero, zero);
|
||||||
code.punpcklqdq(result, xmm_b);
|
code.punpcklqdq(result, xmm_b);
|
||||||
|
@ -1134,7 +1134,7 @@ void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<64, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
|
EmitThreeOpVectorOperation<64, PairedLowerIndexer, FpcrControlledArgument::Present>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
|
||||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
||||||
code.xorps(zero, zero);
|
code.xorps(zero, zero);
|
||||||
code.punpcklqdq(result, xmm_b);
|
code.punpcklqdq(result, xmm_b);
|
||||||
|
|
|
@ -35,7 +35,7 @@ INST(asimd_VMUL, "VMUL", "1111001P0Dzznnnndddd100
|
||||||
//INST(asimd_VFMA, "VFMA/VFMS", "111100100-CC--------1100---1----") // ASIMD
|
//INST(asimd_VFMA, "VFMA/VFMS", "111100100-CC--------1100---1----") // ASIMD
|
||||||
INST(asimd_VADD_float, "VADD (floating-point)", "111100100D0znnnndddd1101NQM0mmmm") // ASIMD
|
INST(asimd_VADD_float, "VADD (floating-point)", "111100100D0znnnndddd1101NQM0mmmm") // ASIMD
|
||||||
INST(asimd_VSUB_float, "VSUB (floating-point)", "111100100D1znnnndddd1101NQM0mmmm") // ASIMD
|
INST(asimd_VSUB_float, "VSUB (floating-point)", "111100100D1znnnndddd1101NQM0mmmm") // ASIMD
|
||||||
//INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110-0C--------1101---0----") // ASIMD
|
INST(asimd_VPADD_float, "VPADD (floating-point)", "111100110D0znnnndddd1101NQM0mmmm") // ASIMD
|
||||||
//INST(asimd_VABD_float, "VABD (floating-point)", "111100110-1C--------1101---0----") // ASIMD
|
//INST(asimd_VABD_float, "VABD (floating-point)", "111100110-1C--------1101---0----") // ASIMD
|
||||||
//INST(asimd_VMLA_float, "VMLA (floating-point)", "111100100-CC--------1101---1----") // ASIMD
|
//INST(asimd_VMLA_float, "VMLA (floating-point)", "111100100-CC--------1101---1----") // ASIMD
|
||||||
INST(asimd_VMUL_float, "VMUL (floating-point)", "111100110D0znnnndddd1101NQM1mmmm") // ASIMD
|
INST(asimd_VMUL_float, "VMUL (floating-point)", "111100110D0znnnndddd1101NQM1mmmm") // ASIMD
|
||||||
|
|
|
@ -368,6 +368,13 @@ bool ArmTranslatorVisitor::asimd_VSUB_float(bool D, bool sz, size_t Vn, size_t V
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::asimd_VPADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
|
||||||
|
return FloatingPointInstruction(*this, D, sz, Vn, Vd, N, Q, M, Vm, [this, Q](const auto&, const auto& reg_n, const auto& reg_m) {
|
||||||
|
return Q ? ir.FPVectorPairedAdd(32, reg_n, reg_m, false)
|
||||||
|
: ir.FPVectorPairedAddLower(32, reg_n, reg_m, false);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::asimd_VMUL_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
|
bool ArmTranslatorVisitor::asimd_VMUL_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) {
|
||||||
return FloatingPointInstruction(*this, D, sz, Vn, Vd, N, Q, M, Vm, [this](const auto&, const auto& reg_n, const auto& reg_m) {
|
return FloatingPointInstruction(*this, D, sz, Vn, Vd, N, Q, M, Vm, [this](const auto&, const auto& reg_n, const auto& reg_m) {
|
||||||
return ir.FPVectorMul(32, reg_n, reg_m, false);
|
return ir.FPVectorMul(32, reg_n, reg_m, false);
|
||||||
|
|
|
@ -464,6 +464,7 @@ struct ArmTranslatorVisitor final {
|
||||||
bool asimd_VMUL(bool P, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
bool asimd_VMUL(bool P, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
||||||
bool asimd_VADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
bool asimd_VADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
||||||
bool asimd_VSUB_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
bool asimd_VSUB_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
||||||
|
bool asimd_VPADD_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
||||||
bool asimd_VMUL_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
bool asimd_VMUL_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
||||||
bool asimd_VMAX_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
bool asimd_VMAX_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
||||||
bool asimd_VMIN_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
bool asimd_VMIN_float(bool D, bool sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
||||||
|
|
|
@ -2420,22 +2420,22 @@ U128 IREmitter::FPVectorNeg(size_t esize, const U128& a) {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorPairedAdd(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::FPVectorPairedAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorPairedAdd32, a, b);
|
return Inst<U128>(Opcode::FPVectorPairedAdd32, a, b, Imm1(fpcr_controlled));
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorPairedAdd64, a, b);
|
return Inst<U128>(Opcode::FPVectorPairedAdd64, a, b, Imm1(fpcr_controlled));
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorPairedAddLower32, a, b);
|
return Inst<U128>(Opcode::FPVectorPairedAddLower32, a, b, Imm1(fpcr_controlled));
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorPairedAddLower64, a, b);
|
return Inst<U128>(Opcode::FPVectorPairedAddLower64, a, b, Imm1(fpcr_controlled));
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
|
@ -358,8 +358,8 @@ public:
|
||||||
U128 FPVectorMulAdd(size_t esize, const U128& addend, const U128& op1, const U128& op2);
|
U128 FPVectorMulAdd(size_t esize, const U128& addend, const U128& op1, const U128& op2);
|
||||||
U128 FPVectorMulX(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorMulX(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorNeg(size_t esize, const U128& a);
|
U128 FPVectorNeg(size_t esize, const U128& a);
|
||||||
U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorRecipEstimate(size_t esize, const U128& a);
|
U128 FPVectorRecipEstimate(size_t esize, const U128& a);
|
||||||
U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact);
|
U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact);
|
||||||
|
|
|
@ -609,10 +609,10 @@ OPCODE(FPVectorMulX64, U128, U128
|
||||||
OPCODE(FPVectorNeg16, U128, U128 )
|
OPCODE(FPVectorNeg16, U128, U128 )
|
||||||
OPCODE(FPVectorNeg32, U128, U128 )
|
OPCODE(FPVectorNeg32, U128, U128 )
|
||||||
OPCODE(FPVectorNeg64, U128, U128 )
|
OPCODE(FPVectorNeg64, U128, U128 )
|
||||||
OPCODE(FPVectorPairedAdd32, U128, U128, U128 )
|
OPCODE(FPVectorPairedAdd32, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorPairedAdd64, U128, U128, U128 )
|
OPCODE(FPVectorPairedAdd64, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorPairedAddLower32, U128, U128, U128 )
|
OPCODE(FPVectorPairedAddLower32, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorPairedAddLower64, U128, U128, U128 )
|
OPCODE(FPVectorPairedAddLower64, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorRecipEstimate16, U128, U128 )
|
OPCODE(FPVectorRecipEstimate16, U128, U128 )
|
||||||
OPCODE(FPVectorRecipEstimate32, U128, U128 )
|
OPCODE(FPVectorRecipEstimate32, U128, U128 )
|
||||||
OPCODE(FPVectorRecipEstimate64, U128, U128 )
|
OPCODE(FPVectorRecipEstimate64, U128, U128 )
|
||||||
|
|
|
@ -110,6 +110,8 @@ u32 GenRandomInst(u32 pc, bool is_last_inst) {
|
||||||
"arm_UDF",
|
"arm_UDF",
|
||||||
// FPSCR is inaccurate
|
// FPSCR is inaccurate
|
||||||
"vfp_VMRS",
|
"vfp_VMRS",
|
||||||
|
// Unimplemented in Unicorn
|
||||||
|
"asimd_VPADD_float",
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const auto& [fn, bitstring] : list) {
|
for (const auto& [fn, bitstring] : list) {
|
||||||
|
|
Loading…
Reference in a new issue