VFP: Implement VNMUL, VDIV
This commit is contained in:
parent
12e7f2c359
commit
3f1345a1a5
8 changed files with 101 additions and 2 deletions
|
@ -1148,6 +1148,22 @@ void EmitX64::EmitFPAbs64(IR::Block&, IR::Inst* inst) {
|
||||||
code->PAND(result, routines->MFloatNonSignMask64());
|
code->PAND(result, routines->MFloatNonSignMask64());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPNeg32(IR::Block&, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
|
||||||
|
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
|
||||||
|
|
||||||
|
code->PXOR(result, routines->MFloatNegativeZero32());
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPNeg64(IR::Block&, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
|
||||||
|
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
|
||||||
|
|
||||||
|
code->PXOR(result, routines->MFloatNegativeZero64());
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPAdd32(IR::Block& block, IR::Inst* inst) {
|
void EmitX64::EmitFPAdd32(IR::Block& block, IR::Inst* inst) {
|
||||||
FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::ADDSS);
|
FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::ADDSS);
|
||||||
}
|
}
|
||||||
|
@ -1156,6 +1172,14 @@ void EmitX64::EmitFPAdd64(IR::Block& block, IR::Inst* inst) {
|
||||||
FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::ADDSD);
|
FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::ADDSD);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPDiv32(IR::Block& block, IR::Inst* inst) {
|
||||||
|
FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::DIVSS);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPDiv64(IR::Block& block, IR::Inst* inst) {
|
||||||
|
FPOp64(code, routines, reg_alloc, block, inst, &XEmitter::DIVSD);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPMul32(IR::Block& block, IR::Inst* inst) {
|
void EmitX64::EmitFPMul32(IR::Block& block, IR::Inst* inst) {
|
||||||
FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::MULSS);
|
FPOp32(code, routines, reg_alloc, block, inst, &XEmitter::MULSS);
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,11 +68,11 @@ boost::optional<const VFP2Matcher<V>&> DecodeVFP2(u32 instruction) {
|
||||||
// VMLS
|
// VMLS
|
||||||
// VNMLA
|
// VNMLA
|
||||||
// VNMLS
|
// VNMLS
|
||||||
// VNMUL
|
|
||||||
INST(&V::vfp2_VMUL, "VMUL", "cccc11100D10nnnndddd101zN0M0mmmm"),
|
INST(&V::vfp2_VMUL, "VMUL", "cccc11100D10nnnndddd101zN0M0mmmm"),
|
||||||
|
INST(&V::vfp2_VNMUL, "VNMUL", "cccc11100D10nnnndddd101zN1M0mmmm"),
|
||||||
INST(&V::vfp2_VADD, "VADD", "cccc11100D11nnnndddd101zN0M0mmmm"),
|
INST(&V::vfp2_VADD, "VADD", "cccc11100D11nnnndddd101zN0M0mmmm"),
|
||||||
INST(&V::vfp2_VSUB, "VSUB", "cccc11100D11nnnndddd101zN1M0mmmm"),
|
INST(&V::vfp2_VSUB, "VSUB", "cccc11100D11nnnndddd101zN1M0mmmm"),
|
||||||
// VDIV
|
INST(&V::vfp2_VDIV, "VDIV", "cccc11101D00nnnndddd101zN0M0mmmm"),
|
||||||
|
|
||||||
// Floating-point other instructions
|
// Floating-point other instructions
|
||||||
// VMOV_imm
|
// VMOV_imm
|
||||||
|
|
|
@ -572,6 +572,14 @@ public:
|
||||||
return Common::StringFromFormat("vsub%s.%s %s, %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vn, N).c_str(), FPRegStr(sz, Vm, M).c_str());
|
return Common::StringFromFormat("vsub%s.%s %s, %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vn, N).c_str(), FPRegStr(sz, Vm, M).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string vfp2_VNMUL(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) {
|
||||||
|
return Common::StringFromFormat("vnmul%s.%s %s, %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vn, N).c_str(), FPRegStr(sz, Vm, M).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string vfp2_VDIV(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) {
|
||||||
|
return Common::StringFromFormat("vdiv%s.%s %s, %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vn, N).c_str(), FPRegStr(sz, Vm, M).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
std::string vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) {
|
std::string vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) {
|
||||||
return Common::StringFromFormat("vadd%s.%s %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str());
|
return Common::StringFromFormat("vadd%s.%s %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str());
|
||||||
}
|
}
|
||||||
|
|
|
@ -292,6 +292,16 @@ IR::Value IREmitter::FPAdd64(const IR::Value& a, const IR::Value& b, bool fpscr_
|
||||||
return Inst(IR::Opcode::FPAdd64, {a, b});
|
return Inst(IR::Opcode::FPAdd64, {a, b});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPDiv32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPDiv32, {a, b});
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPDiv64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPDiv64, {a, b});
|
||||||
|
}
|
||||||
|
|
||||||
IR::Value IREmitter::FPMul32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) {
|
IR::Value IREmitter::FPMul32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) {
|
||||||
ASSERT(fpscr_controlled);
|
ASSERT(fpscr_controlled);
|
||||||
return Inst(IR::Opcode::FPMul32, {a, b});
|
return Inst(IR::Opcode::FPMul32, {a, b});
|
||||||
|
@ -302,6 +312,15 @@ IR::Value IREmitter::FPMul64(const IR::Value& a, const IR::Value& b, bool fpscr_
|
||||||
return Inst(IR::Opcode::FPMul64, {a, b});
|
return Inst(IR::Opcode::FPMul64, {a, b});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPNeg32(const IR::Value& a) {
|
||||||
|
return Inst(IR::Opcode::FPNeg32, {a});
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPNeg64(const IR::Value& a) {
|
||||||
|
return Inst(IR::Opcode::FPNeg64, {a});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
IR::Value IREmitter::FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) {
|
IR::Value IREmitter::FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled) {
|
||||||
ASSERT(fpscr_controlled);
|
ASSERT(fpscr_controlled);
|
||||||
return Inst(IR::Opcode::FPSub32, {a, b});
|
return Inst(IR::Opcode::FPSub32, {a, b});
|
||||||
|
|
|
@ -96,8 +96,12 @@ public:
|
||||||
IR::Value FPAbs64(const IR::Value& a);
|
IR::Value FPAbs64(const IR::Value& a);
|
||||||
IR::Value FPAdd32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
IR::Value FPAdd32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||||
IR::Value FPAdd64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
IR::Value FPAdd64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||||
|
IR::Value FPDiv32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||||
|
IR::Value FPDiv64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||||
IR::Value FPMul32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
IR::Value FPMul32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||||
IR::Value FPMul64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
IR::Value FPMul64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||||
|
IR::Value FPNeg32(const IR::Value& a);
|
||||||
|
IR::Value FPNeg64(const IR::Value& a);
|
||||||
IR::Value FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
IR::Value FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||||
IR::Value FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
IR::Value FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||||
|
|
||||||
|
|
|
@ -64,8 +64,12 @@ OPCODE(FPAbs32, T::F32, T::F32
|
||||||
OPCODE(FPAbs64, T::F64, T::F64 )
|
OPCODE(FPAbs64, T::F64, T::F64 )
|
||||||
OPCODE(FPAdd32, T::F32, T::F32, T::F32 )
|
OPCODE(FPAdd32, T::F32, T::F32, T::F32 )
|
||||||
OPCODE(FPAdd64, T::F64, T::F64, T::F64 )
|
OPCODE(FPAdd64, T::F64, T::F64, T::F64 )
|
||||||
|
OPCODE(FPDiv32, T::F32, T::F32, T::F32 )
|
||||||
|
OPCODE(FPDiv64, T::F64, T::F64, T::F64 )
|
||||||
OPCODE(FPMul32, T::F32, T::F32, T::F32 )
|
OPCODE(FPMul32, T::F32, T::F32, T::F32 )
|
||||||
OPCODE(FPMul64, T::F64, T::F64, T::F64 )
|
OPCODE(FPMul64, T::F64, T::F64, T::F64 )
|
||||||
|
OPCODE(FPNeg32, T::F32, T::F32 )
|
||||||
|
OPCODE(FPNeg64, T::F64, T::F64 )
|
||||||
OPCODE(FPSub32, T::F32, T::F32, T::F32 )
|
OPCODE(FPSub32, T::F32, T::F32, T::F32 )
|
||||||
OPCODE(FPSub64, T::F64, T::F64, T::F64 )
|
OPCODE(FPSub64, T::F64, T::F64, T::F64 )
|
||||||
|
|
||||||
|
|
|
@ -321,7 +321,9 @@ struct ArmTranslatorVisitor final {
|
||||||
// Floating-point three-register data processing instructions
|
// Floating-point three-register data processing instructions
|
||||||
bool vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm);
|
bool vfp2_VADD(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm);
|
||||||
bool vfp2_VSUB(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm);
|
bool vfp2_VSUB(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm);
|
||||||
|
bool vfp2_VNMUL(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm);
|
||||||
bool vfp2_VMUL(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm);
|
bool vfp2_VMUL(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm);
|
||||||
|
bool vfp2_VDIV(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm);
|
||||||
|
|
||||||
// Floating-point misc instructions
|
// Floating-point misc instructions
|
||||||
bool vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
|
bool vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
|
||||||
|
|
|
@ -74,6 +74,44 @@ bool ArmTranslatorVisitor::vfp2_VMUL(Cond cond, bool D, size_t Vn, size_t Vd, bo
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::vfp2_VNMUL(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) {
|
||||||
|
if (ir.current_location.FPSCR_Len() != 1 || ir.current_location.FPSCR_Stride() != 1)
|
||||||
|
return InterpretThisInstruction(); // TODO: Vectorised floating point instructions
|
||||||
|
|
||||||
|
ExtReg d = ToExtReg(sz, Vd, D);
|
||||||
|
ExtReg n = ToExtReg(sz, Vn, N);
|
||||||
|
ExtReg m = ToExtReg(sz, Vm, M);
|
||||||
|
// VNMUL.{F32,F64} <{S,D}d>, <{S,D}n>, <{S,D}m>
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto a = ir.GetExtendedRegister(n);
|
||||||
|
auto b = ir.GetExtendedRegister(m);
|
||||||
|
auto result = sz
|
||||||
|
? ir.FPNeg64(ir.FPMul64(a, b, true))
|
||||||
|
: ir.FPNeg32(ir.FPMul32(a, b, true));
|
||||||
|
ir.SetExtendedRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::vfp2_VDIV(Cond cond, bool D, size_t Vn, size_t Vd, bool sz, bool N, bool M, size_t Vm) {
|
||||||
|
if (ir.current_location.FPSCR_Len() != 1 || ir.current_location.FPSCR_Stride() != 1)
|
||||||
|
return InterpretThisInstruction(); // TODO: Vectorised floating point instructions
|
||||||
|
|
||||||
|
ExtReg d = ToExtReg(sz, Vd, D);
|
||||||
|
ExtReg n = ToExtReg(sz, Vn, N);
|
||||||
|
ExtReg m = ToExtReg(sz, Vm, M);
|
||||||
|
// VDIV.{F32,F64} <{S,D}d>, <{S,D}n>, <{S,D}m>
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto a = ir.GetExtendedRegister(n);
|
||||||
|
auto b = ir.GetExtendedRegister(m);
|
||||||
|
auto result = sz
|
||||||
|
? ir.FPDiv64(a, b, true)
|
||||||
|
: ir.FPDiv32(a, b, true);
|
||||||
|
ir.SetExtendedRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) {
|
bool ArmTranslatorVisitor::vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) {
|
||||||
if (ir.current_location.FPSCR_Len() != 1 || ir.current_location.FPSCR_Stride() != 1)
|
if (ir.current_location.FPSCR_Len() != 1 || ir.current_location.FPSCR_Stride() != 1)
|
||||||
return InterpretThisInstruction(); // TODO: Vectorised floating point instructions
|
return InterpretThisInstruction(); // TODO: Vectorised floating point instructions
|
||||||
|
|
Loading…
Reference in a new issue