Implement VCVT
This commit is contained in:
parent
445aad0639
commit
b5a86889cd
8 changed files with 447 additions and 3 deletions
|
@ -1211,6 +1211,13 @@ static void DefaultNaN64(BlockOfCode* code, X64Reg xmm_value) {
|
||||||
code->SetJumpTarget(fixup);
|
code->SetJumpTarget(fixup);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ZeroIfNaN64(BlockOfCode* code, X64Reg xmm_value) {
|
||||||
|
code->UCOMISD(xmm_value, R(xmm_value));
|
||||||
|
auto fixup = code->J_CC(CC_NP);
|
||||||
|
code->MOVAPS(xmm_value, code->MFloatPositiveZero64());
|
||||||
|
code->SetJumpTarget(fixup);
|
||||||
|
}
|
||||||
|
|
||||||
static void FPThreeOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) {
|
static void FPThreeOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) {
|
||||||
IR::Value a = inst->GetArg(0);
|
IR::Value a = inst->GetArg(0);
|
||||||
IR::Value b = inst->GetArg(1);
|
IR::Value b = inst->GetArg(1);
|
||||||
|
@ -1389,6 +1396,283 @@ void EmitX64::EmitFPSub64(IR::Block& block, IR::Inst* inst) {
|
||||||
FPThreeOp64(code, reg_alloc, block, inst, &XEmitter::SUBSD);
|
FPThreeOp64(code, reg_alloc, block, inst, &XEmitter::SUBSD);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPSingleToDouble(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
|
||||||
|
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
|
||||||
|
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
DenormalsAreZero32(code, result, gpr_scratch);
|
||||||
|
}
|
||||||
|
code->CVTSS2SD(result, R(result));
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
FlushToZero64(code, result, gpr_scratch);
|
||||||
|
}
|
||||||
|
if (block.location.FPSCR_DN()) {
|
||||||
|
DefaultNaN64(code, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPDoubleToSingle(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
|
||||||
|
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
|
||||||
|
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
DenormalsAreZero64(code, result, gpr_scratch);
|
||||||
|
}
|
||||||
|
code->CVTSD2SS(result, R(result));
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
FlushToZero32(code, result, gpr_scratch);
|
||||||
|
}
|
||||||
|
if (block.location.FPSCR_DN()) {
|
||||||
|
DefaultNaN32(code, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPSingleToS32(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
bool round_towards_zero = inst->GetArg(1).GetU1();
|
||||||
|
|
||||||
|
X64Reg from = reg_alloc.UseScratchRegister(a, any_xmm);
|
||||||
|
X64Reg to = reg_alloc.DefRegister(inst, any_xmm);
|
||||||
|
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
|
||||||
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
||||||
|
// Conversion to double is lossless, and allows for clamping.
|
||||||
|
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
DenormalsAreZero32(code, from, gpr_scratch);
|
||||||
|
}
|
||||||
|
code->CVTSS2SD(from, R(from));
|
||||||
|
// First time is to set flags
|
||||||
|
if (round_towards_zero) {
|
||||||
|
code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
} else {
|
||||||
|
code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
}
|
||||||
|
// Clamp to output range
|
||||||
|
ZeroIfNaN64(code, from);
|
||||||
|
code->MINSD(from, code->MFloatMaxS32());
|
||||||
|
code->MAXSD(from, code->MFloatMinS32());
|
||||||
|
// Second time is for real
|
||||||
|
if (round_towards_zero) {
|
||||||
|
code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
} else {
|
||||||
|
code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
}
|
||||||
|
code->MOVD_xmm(to, R(gpr_scratch));
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPSingleToU32(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
bool round_towards_zero = inst->GetArg(1).GetU1();
|
||||||
|
|
||||||
|
X64Reg from = reg_alloc.UseScratchRegister(a, any_xmm);
|
||||||
|
X64Reg to = reg_alloc.DefRegister(inst, any_xmm);
|
||||||
|
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
|
||||||
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
||||||
|
// Conversion to double is lossless, and allows for accurate clamping.
|
||||||
|
//
|
||||||
|
// Since SSE2 doesn't provide an unsigned conversion, we shift the range as appropriate.
|
||||||
|
//
|
||||||
|
// FIXME: Inexact exception not correctly signalled with the below code
|
||||||
|
|
||||||
|
if (block.location.FPSCR_RMode() != Arm::FPRoundingMode::RoundTowardsZero && !round_towards_zero) {
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
DenormalsAreZero32(code, from, gpr_scratch);
|
||||||
|
}
|
||||||
|
code->CVTSS2SD(from, R(from));
|
||||||
|
ZeroIfNaN64(code, from);
|
||||||
|
// Bring into SSE range
|
||||||
|
code->ADDSD(from, code->MFloatMinS32());
|
||||||
|
// First time is to set flags
|
||||||
|
code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
// Clamp to output range
|
||||||
|
code->MINSD(from, code->MFloatMaxS32());
|
||||||
|
code->MAXSD(from, code->MFloatMinS32());
|
||||||
|
// Actually convert
|
||||||
|
code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
// Bring back into original range
|
||||||
|
code->ADD(32, R(gpr_scratch), Imm32(2147483648u));
|
||||||
|
code->MOVQ_xmm(to, R(gpr_scratch));
|
||||||
|
} else {
|
||||||
|
X64Reg xmm_mask = reg_alloc.ScratchRegister(any_xmm);
|
||||||
|
X64Reg gpr_mask = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
DenormalsAreZero32(code, from, gpr_scratch);
|
||||||
|
}
|
||||||
|
code->CVTSS2SD(from, R(from));
|
||||||
|
ZeroIfNaN64(code, from);
|
||||||
|
// Generate masks if out-of-signed-range
|
||||||
|
code->MOVAPS(xmm_mask, code->MFloatMaxS32());
|
||||||
|
code->CMPLTSD(xmm_mask, R(from));
|
||||||
|
code->MOVQ_xmm(R(gpr_mask), xmm_mask);
|
||||||
|
code->PAND(xmm_mask, code->MFloatMinS32());
|
||||||
|
code->AND(32, R(gpr_mask), Imm32(2147483648u));
|
||||||
|
// Bring into range if necessary
|
||||||
|
code->ADDSD(from, R(xmm_mask));
|
||||||
|
// First time is to set flags
|
||||||
|
code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
// Clamp to output range
|
||||||
|
code->MINSD(from, code->MFloatMaxS32());
|
||||||
|
code->MAXSD(from, code->MFloatMinU32());
|
||||||
|
// Actually convert
|
||||||
|
code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
// Bring back into original range if necessary
|
||||||
|
code->ADD(32, R(gpr_scratch), R(gpr_mask));
|
||||||
|
code->MOVQ_xmm(to, R(gpr_scratch));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPDoubleToS32(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
bool round_towards_zero = inst->GetArg(1).GetU1();
|
||||||
|
|
||||||
|
X64Reg from = reg_alloc.UseScratchRegister(a, any_xmm);
|
||||||
|
X64Reg to = reg_alloc.DefRegister(inst, any_xmm);
|
||||||
|
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
|
||||||
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
||||||
|
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
DenormalsAreZero64(code, from, gpr_scratch);
|
||||||
|
}
|
||||||
|
// First time is to set flags
|
||||||
|
if (round_towards_zero) {
|
||||||
|
code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
} else {
|
||||||
|
code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
}
|
||||||
|
// Clamp to output range
|
||||||
|
ZeroIfNaN64(code, from);
|
||||||
|
code->MINSD(from, code->MFloatMaxS32());
|
||||||
|
code->MAXSD(from, code->MFloatMinS32());
|
||||||
|
// Second time is for real
|
||||||
|
if (round_towards_zero) {
|
||||||
|
code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
} else {
|
||||||
|
code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
}
|
||||||
|
code->MOVD_xmm(to, R(gpr_scratch));
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPDoubleToU32(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
bool round_towards_zero = inst->GetArg(1).GetU1();
|
||||||
|
|
||||||
|
X64Reg from = reg_alloc.UseScratchRegister(a, any_xmm);
|
||||||
|
X64Reg to = reg_alloc.DefRegister(inst, any_xmm);
|
||||||
|
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
X64Reg xmm_scratch = reg_alloc.ScratchRegister(any_xmm);
|
||||||
|
|
||||||
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
||||||
|
// TODO: Use VCVTPD2UDQ when AVX512VL is available.
|
||||||
|
// FIXME: Inexact exception not correctly signalled with the below code
|
||||||
|
|
||||||
|
if (block.location.FPSCR_RMode() != Arm::FPRoundingMode::RoundTowardsZero && !round_towards_zero) {
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
DenormalsAreZero64(code, from, gpr_scratch);
|
||||||
|
}
|
||||||
|
ZeroIfNaN64(code, from);
|
||||||
|
// Bring into SSE range
|
||||||
|
code->ADDSD(from, code->MFloatMinS32());
|
||||||
|
// First time is to set flags
|
||||||
|
code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
// Clamp to output range
|
||||||
|
code->MINSD(from, code->MFloatMaxS32());
|
||||||
|
code->MAXSD(from, code->MFloatMinS32());
|
||||||
|
// Actually convert
|
||||||
|
code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
// Bring back into original range
|
||||||
|
code->ADD(32, R(gpr_scratch), Imm32(2147483648u));
|
||||||
|
code->MOVQ_xmm(to, R(gpr_scratch));
|
||||||
|
} else {
|
||||||
|
X64Reg xmm_mask = reg_alloc.ScratchRegister(any_xmm);
|
||||||
|
X64Reg gpr_mask = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
|
||||||
|
if (block.location.FPSCR_FTZ()) {
|
||||||
|
DenormalsAreZero64(code, from, gpr_scratch);
|
||||||
|
}
|
||||||
|
ZeroIfNaN64(code, from);
|
||||||
|
// Generate masks if out-of-signed-range
|
||||||
|
code->MOVAPS(xmm_mask, code->MFloatMaxS32());
|
||||||
|
code->CMPLTSD(xmm_mask, R(from));
|
||||||
|
code->MOVQ_xmm(R(gpr_mask), xmm_mask);
|
||||||
|
code->PAND(xmm_mask, code->MFloatMinS32());
|
||||||
|
code->AND(32, R(gpr_mask), Imm32(2147483648u));
|
||||||
|
// Bring into range if necessary
|
||||||
|
code->ADDSD(from, R(xmm_mask));
|
||||||
|
// First time is to set flags
|
||||||
|
code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
// Clamp to output range
|
||||||
|
code->MINSD(from, code->MFloatMaxS32());
|
||||||
|
code->MAXSD(from, code->MFloatMinU32());
|
||||||
|
// Actually convert
|
||||||
|
code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr
|
||||||
|
// Bring back into original range if necessary
|
||||||
|
code->ADD(32, R(gpr_scratch), R(gpr_mask));
|
||||||
|
code->MOVQ_xmm(to, R(gpr_scratch));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPS32ToSingle(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
bool round_to_nearest = inst->GetArg(1).GetU1();
|
||||||
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
||||||
|
|
||||||
|
X64Reg from = reg_alloc.UseRegister(a, any_xmm);
|
||||||
|
X64Reg to = reg_alloc.DefRegister(inst, any_xmm);
|
||||||
|
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
|
||||||
|
code->MOVD_xmm(R(gpr_scratch), from);
|
||||||
|
code->CVTSI2SS(32, to, R(gpr_scratch));
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPU32ToSingle(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
bool round_to_nearest = inst->GetArg(1).GetU1();
|
||||||
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
||||||
|
|
||||||
|
X64Reg from = reg_alloc.UseRegister(a, any_xmm);
|
||||||
|
X64Reg to = reg_alloc.DefRegister(inst, any_xmm);
|
||||||
|
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
|
||||||
|
code->MOVD_xmm(R(gpr_scratch), from);
|
||||||
|
code->CVTSI2SS(64, to, R(gpr_scratch));
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPS32ToDouble(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
bool round_to_nearest = inst->GetArg(1).GetU1();
|
||||||
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
||||||
|
|
||||||
|
X64Reg from = reg_alloc.UseRegister(a, any_xmm);
|
||||||
|
X64Reg to = reg_alloc.DefRegister(inst, any_xmm);
|
||||||
|
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
|
||||||
|
code->MOVD_xmm(R(gpr_scratch), from);
|
||||||
|
code->CVTSI2SD(32, to, R(gpr_scratch));
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPU32ToDouble(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
bool round_to_nearest = inst->GetArg(1).GetU1();
|
||||||
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
||||||
|
|
||||||
|
X64Reg from = reg_alloc.UseRegister(a, any_xmm);
|
||||||
|
X64Reg to = reg_alloc.DefRegister(inst, any_xmm);
|
||||||
|
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||||
|
|
||||||
|
code->MOVD_xmm(R(gpr_scratch), from);
|
||||||
|
code->CVTSI2SD(64, to, R(gpr_scratch));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void EmitX64::EmitClearExclusive(IR::Block&, IR::Inst*) {
|
void EmitX64::EmitClearExclusive(IR::Block&, IR::Inst*) {
|
||||||
code->MOV(8, MDisp(R15, offsetof(JitState, exclusive_state)), Imm8(0));
|
code->MOV(8, MDisp(R15, offsetof(JitState, exclusive_state)), Imm8(0));
|
||||||
}
|
}
|
||||||
|
|
|
@ -89,10 +89,12 @@ boost::optional<const VFP2Matcher<V>&> DecodeVFP2(u32 instruction) {
|
||||||
INST(&V::vfp2_VABS, "VABS", "cccc11101D110000dddd101z11M0mmmm"),
|
INST(&V::vfp2_VABS, "VABS", "cccc11101D110000dddd101z11M0mmmm"),
|
||||||
INST(&V::vfp2_VNEG, "VNEG", "cccc11101D110001dddd101z01M0mmmm"),
|
INST(&V::vfp2_VNEG, "VNEG", "cccc11101D110001dddd101z01M0mmmm"),
|
||||||
INST(&V::vfp2_VSQRT, "VSQRT", "cccc11101D110001dddd101z11M0mmmm"),
|
INST(&V::vfp2_VSQRT, "VSQRT", "cccc11101D110001dddd101z11M0mmmm"),
|
||||||
|
INST(&V::vfp2_VCVT_f_to_f, "VCVT (f32<->f64)", "cccc11101D110111dddd101z11M0mmmm"),
|
||||||
|
INST(&V::vfp2_VCVT_to_float, "VCVT (to float)", "cccc11101D111000dddd101zs1M0mmmm"),
|
||||||
|
INST(&V::vfp2_VCVT_to_u32, "VCVT (to u32)", "cccc11101D111100dddd101zr1M0mmmm"),
|
||||||
|
INST(&V::vfp2_VCVT_to_s32, "VCVT (to s32)", "cccc11101D111101dddd101zr1M0mmmm"),
|
||||||
// VCMP
|
// VCMP
|
||||||
// VCMPE
|
// VCMPE
|
||||||
// VCVT
|
|
||||||
// VCVTR
|
|
||||||
|
|
||||||
// Extension register load-store instructions
|
// Extension register load-store instructions
|
||||||
INST(&V::vfp2_VPUSH, "VPUSH", "cccc11010D101101dddd101zvvvvvvvv"),
|
INST(&V::vfp2_VPUSH, "VPUSH", "cccc11010D101101dddd101zvvvvvvvv"),
|
||||||
|
|
|
@ -871,6 +871,22 @@ public:
|
||||||
return Common::StringFromFormat("vsqrt%s.%s %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str());
|
return Common::StringFromFormat("vsqrt%s.%s %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string vfp2_VCVT_f_to_f(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) {
|
||||||
|
return Common::StringFromFormat("vcvt%s.%s.%s %s, %s", CondToString(cond), !sz ? "f64" : "f32", sz ? "f64" : "f32", FPRegStr(!sz, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string vfp2_VCVT_to_float(Cond cond, bool D, size_t Vd, bool sz, bool is_signed, bool M, size_t Vm) {
|
||||||
|
return Common::StringFromFormat("vcvt%s.%s.%s %s, %s", CondToString(cond), sz ? "f64" : "f32", is_signed ? "s32" : "u32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(false, Vm, M).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm) {
|
||||||
|
return Common::StringFromFormat("vcvt%s%s.u32.%s %s, %s", round_towards_zero ? "" : "r", CondToString(cond), sz ? "f64" : "f32", FPRegStr(false, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm) {
|
||||||
|
return Common::StringFromFormat("vcvt%s%s.s32.%s %s, %s", round_towards_zero ? "" : "r", CondToString(cond), sz ? "f64" : "f32", FPRegStr(false, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
std::string vfp2_VPOP(Cond cond, bool D, size_t Vd, bool sz, Imm8 imm8) {
|
std::string vfp2_VPOP(Cond cond, bool D, size_t Vd, bool sz, Imm8 imm8) {
|
||||||
return Common::StringFromFormat("vpop%s %s(+%u)", CondToString(cond), FPRegStr(sz, Vd, D).c_str(), imm8 >> (sz ? 1 : 0));
|
return Common::StringFromFormat("vpop%s %s(+%u)", CondToString(cond), FPRegStr(sz, Vd, D).c_str(), imm8 >> (sz ? 1 : 0));
|
||||||
}
|
}
|
||||||
|
|
|
@ -406,6 +406,56 @@ IR::Value IREmitter::FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_
|
||||||
return Inst(IR::Opcode::FPSub64, {a, b});
|
return Inst(IR::Opcode::FPSub64, {a, b});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPDoubleToSingle(const IR::Value& a, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPDoubleToSingle, {a});
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPSingleToDouble(const IR::Value& a, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPSingleToDouble, {a});
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPSingleToS32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPSingleToS32, {a, Imm1(round_towards_zero)});
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPSingleToU32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPSingleToU32, {a, Imm1(round_towards_zero)});
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPDoubleToS32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPDoubleToS32, {a, Imm1(round_towards_zero)});
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPDoubleToU32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPDoubleToU32, {a, Imm1(round_towards_zero)});
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPS32ToSingle(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPS32ToSingle, {a, Imm1(round_to_nearest)});
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPU32ToSingle(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPU32ToSingle, {a, Imm1(round_to_nearest)});
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPS32ToDouble(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPS32ToDouble, {a, Imm1(round_to_nearest)});
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::Value IREmitter::FPU32ToDouble(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled) {
|
||||||
|
ASSERT(fpscr_controlled);
|
||||||
|
return Inst(IR::Opcode::FPU32ToDouble, {a, Imm1(round_to_nearest)});
|
||||||
|
}
|
||||||
|
|
||||||
void IREmitter::ClearExlcusive() {
|
void IREmitter::ClearExlcusive() {
|
||||||
Inst(IR::Opcode::ClearExclusive, {});
|
Inst(IR::Opcode::ClearExclusive, {});
|
||||||
}
|
}
|
||||||
|
|
|
@ -140,6 +140,16 @@ public:
|
||||||
IR::Value FPSqrt64(const IR::Value& a);
|
IR::Value FPSqrt64(const IR::Value& a);
|
||||||
IR::Value FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
IR::Value FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||||
IR::Value FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
IR::Value FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled);
|
||||||
|
IR::Value FPDoubleToSingle(const IR::Value& a, bool fpscr_controlled);
|
||||||
|
IR::Value FPSingleToDouble(const IR::Value& a, bool fpscr_controlled);
|
||||||
|
IR::Value FPSingleToS32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled);
|
||||||
|
IR::Value FPSingleToU32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled);
|
||||||
|
IR::Value FPDoubleToS32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled);
|
||||||
|
IR::Value FPDoubleToU32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled);
|
||||||
|
IR::Value FPS32ToSingle(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled);
|
||||||
|
IR::Value FPU32ToSingle(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled);
|
||||||
|
IR::Value FPS32ToDouble(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled);
|
||||||
|
IR::Value FPU32ToDouble(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled);
|
||||||
|
|
||||||
void ClearExlcusive();
|
void ClearExlcusive();
|
||||||
void SetExclusive(const IR::Value& vaddr, size_t byte_size);
|
void SetExclusive(const IR::Value& vaddr, size_t byte_size);
|
||||||
|
|
|
@ -74,7 +74,7 @@ OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32
|
||||||
OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 )
|
||||||
|
|
||||||
// Floating-point
|
// Floating-point operations
|
||||||
OPCODE(TransferToFP32, T::F32, T::U32 )
|
OPCODE(TransferToFP32, T::F32, T::U32 )
|
||||||
OPCODE(TransferToFP64, T::F64, T::U64 )
|
OPCODE(TransferToFP64, T::F64, T::U64 )
|
||||||
OPCODE(TransferFromFP32, T::U32, T::F32 )
|
OPCODE(TransferFromFP32, T::U32, T::F32 )
|
||||||
|
@ -94,6 +94,18 @@ OPCODE(FPSqrt64, T::F64, T::F64
|
||||||
OPCODE(FPSub32, T::F32, T::F32, T::F32 )
|
OPCODE(FPSub32, T::F32, T::F32, T::F32 )
|
||||||
OPCODE(FPSub64, T::F64, T::F64, T::F64 )
|
OPCODE(FPSub64, T::F64, T::F64, T::F64 )
|
||||||
|
|
||||||
|
// Floating-point conversions
|
||||||
|
OPCODE(FPSingleToDouble, T::F64, T::F32 )
|
||||||
|
OPCODE(FPDoubleToSingle, T::F32, T::F64 )
|
||||||
|
OPCODE(FPSingleToU32, T::F32, T::F32, T::U1 )
|
||||||
|
OPCODE(FPSingleToS32, T::F32, T::F32, T::U1 )
|
||||||
|
OPCODE(FPDoubleToU32, T::F32, T::F64, T::U1 )
|
||||||
|
OPCODE(FPDoubleToS32, T::F32, T::F64, T::U1 )
|
||||||
|
OPCODE(FPU32ToSingle, T::F32, T::F32, T::U1 )
|
||||||
|
OPCODE(FPS32ToSingle, T::F32, T::F32, T::U1 )
|
||||||
|
OPCODE(FPU32ToDouble, T::F64, T::F32, T::U1 )
|
||||||
|
OPCODE(FPS32ToDouble, T::F64, T::F32, T::U1 )
|
||||||
|
|
||||||
// Memory access
|
// Memory access
|
||||||
OPCODE(ClearExclusive, T::Void, )
|
OPCODE(ClearExclusive, T::Void, )
|
||||||
OPCODE(SetExclusive, T::Void, T::U32, T::U8 )
|
OPCODE(SetExclusive, T::Void, T::U32, T::U8 )
|
||||||
|
|
|
@ -357,6 +357,10 @@ struct ArmTranslatorVisitor final {
|
||||||
bool vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
|
bool vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
|
||||||
bool vfp2_VNEG(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
|
bool vfp2_VNEG(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
|
||||||
bool vfp2_VSQRT(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
|
bool vfp2_VSQRT(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
|
||||||
|
bool vfp2_VCVT_f_to_f(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm);
|
||||||
|
bool vfp2_VCVT_to_float(Cond cond, bool D, size_t Vd, bool sz, bool is_signed, bool M, size_t Vm);
|
||||||
|
bool vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm);
|
||||||
|
bool vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm);
|
||||||
|
|
||||||
// Floating-point load-store instructions
|
// Floating-point load-store instructions
|
||||||
bool vfp2_VLDR(Cond cond, bool U, bool D, Reg n, size_t Vd, bool sz, Imm8 imm8);
|
bool vfp2_VLDR(Cond cond, bool U, bool D, Reg n, size_t Vd, bool sz, Imm8 imm8);
|
||||||
|
|
|
@ -360,6 +360,72 @@ bool ArmTranslatorVisitor::vfp2_VSQRT(Cond cond, bool D, size_t Vd, bool sz, boo
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::vfp2_VCVT_f_to_f(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) {
|
||||||
|
ExtReg d = ToExtReg(!sz, Vd, D); // Destination is of opposite size to source
|
||||||
|
ExtReg m = ToExtReg(sz, Vm, M);
|
||||||
|
// VCVT.F64.F32 <Sd> <Dm>
|
||||||
|
// VCVT.F32.F64 <Dd> <Sm>
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto a = ir.GetExtendedRegister(m);
|
||||||
|
auto result = sz
|
||||||
|
? ir.FPDoubleToSingle(a, true)
|
||||||
|
: ir.FPSingleToDouble(a, true);
|
||||||
|
ir.SetExtendedRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::vfp2_VCVT_to_float(Cond cond, bool D, size_t Vd, bool sz, bool is_signed, bool M, size_t Vm) {
|
||||||
|
ExtReg d = ToExtReg(sz, Vd, D);
|
||||||
|
ExtReg m = ToExtReg(false, Vm, M);
|
||||||
|
bool round_to_nearest = false;
|
||||||
|
// VCVT.F32.{S32,U32} <Sd>, <Sm>
|
||||||
|
// VCVT.F64.{S32,U32} <Sd>, <Dm>
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto a = ir.GetExtendedRegister(m);
|
||||||
|
auto result = sz
|
||||||
|
? is_signed
|
||||||
|
? ir.FPS32ToDouble(a, round_to_nearest, true)
|
||||||
|
: ir.FPU32ToDouble(a, round_to_nearest, true)
|
||||||
|
: is_signed
|
||||||
|
? ir.FPS32ToSingle(a, round_to_nearest, true)
|
||||||
|
: ir.FPU32ToSingle(a, round_to_nearest, true);
|
||||||
|
ir.SetExtendedRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm) {
|
||||||
|
ExtReg d = ToExtReg(false, Vd, D);
|
||||||
|
ExtReg m = ToExtReg(sz, Vm, M);
|
||||||
|
// VCVT{,R}.U32.F32 <Sd>, <Sm>
|
||||||
|
// VCVT{,R}.U32.F64 <Sd>, <Dm>
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto a = ir.GetExtendedRegister(m);
|
||||||
|
auto result = sz
|
||||||
|
? ir.FPDoubleToU32(a, round_towards_zero, true)
|
||||||
|
: ir.FPSingleToU32(a, round_towards_zero, true);
|
||||||
|
ir.SetExtendedRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm) {
|
||||||
|
ExtReg d = ToExtReg(false, Vd, D);
|
||||||
|
ExtReg m = ToExtReg(sz, Vm, M);
|
||||||
|
// VCVT{,R}.S32.F32 <Sd>, <Sm>
|
||||||
|
// VCVT{,R}.S32.F64 <Sd>, <Dm>
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto a = ir.GetExtendedRegister(m);
|
||||||
|
auto result = sz
|
||||||
|
? ir.FPDoubleToS32(a, round_towards_zero, true)
|
||||||
|
: ir.FPSingleToS32(a, round_towards_zero, true);
|
||||||
|
ir.SetExtendedRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::vfp2_VPOP(Cond cond, bool D, size_t Vd, bool sz, Imm8 imm8) {
|
bool ArmTranslatorVisitor::vfp2_VPOP(Cond cond, bool D, size_t Vd, bool sz, Imm8 imm8) {
|
||||||
const ExtReg d = ToExtReg(sz, Vd, D);
|
const ExtReg d = ToExtReg(sz, Vd, D);
|
||||||
const size_t regs = sz ? imm8 >> 1 : imm8;
|
const size_t regs = sz ? imm8 >> 1 : imm8;
|
||||||
|
|
Loading…
Reference in a new issue