From b5a86889cdc4cdc0179d9849947ba6bfe923d9f7 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 23 Aug 2016 22:04:46 +0100 Subject: [PATCH] Implement VCVT --- src/backend_x64/emit_x64.cpp | 284 ++++++++++++++++++ src/frontend/decoder/vfp2.h | 6 +- .../disassembler/disassembler_arm.cpp | 16 + src/frontend/ir/ir_emitter.cpp | 50 +++ src/frontend/ir/ir_emitter.h | 10 + src/frontend/ir/opcodes.inc | 14 +- .../translate/translate_arm/translate_arm.h | 4 + src/frontend/translate/translate_arm/vfp2.cpp | 66 ++++ 8 files changed, 447 insertions(+), 3 deletions(-) diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 4779afe0..cf9598a0 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -1211,6 +1211,13 @@ static void DefaultNaN64(BlockOfCode* code, X64Reg xmm_value) { code->SetJumpTarget(fixup); } +static void ZeroIfNaN64(BlockOfCode* code, X64Reg xmm_value) { + code->UCOMISD(xmm_value, R(xmm_value)); + auto fixup = code->J_CC(CC_NP); + code->MOVAPS(xmm_value, code->MFloatPositiveZero64()); + code->SetJumpTarget(fixup); +} + static void FPThreeOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (XEmitter::*fn)(X64Reg, const OpArg&)) { IR::Value a = inst->GetArg(0); IR::Value b = inst->GetArg(1); @@ -1389,6 +1396,283 @@ void EmitX64::EmitFPSub64(IR::Block& block, IR::Inst* inst) { FPThreeOp64(code, reg_alloc, block, inst, &XEmitter::SUBSD); } +void EmitX64::EmitFPSingleToDouble(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + + X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero32(code, result, gpr_scratch); + } + code->CVTSS2SD(result, R(result)); + if (block.location.FPSCR_FTZ()) { + FlushToZero64(code, result, gpr_scratch); + } + if (block.location.FPSCR_DN()) { + DefaultNaN64(code, result); + } +} + +void EmitX64::EmitFPDoubleToSingle(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + + X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero64(code, result, gpr_scratch); + } + code->CVTSD2SS(result, R(result)); + if (block.location.FPSCR_FTZ()) { + FlushToZero32(code, result, gpr_scratch); + } + if (block.location.FPSCR_DN()) { + DefaultNaN32(code, result); + } +} + +void EmitX64::EmitFPSingleToS32(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + bool round_towards_zero = inst->GetArg(1).GetU1(); + + X64Reg from = reg_alloc.UseScratchRegister(a, any_xmm); + X64Reg to = reg_alloc.DefRegister(inst, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + // ARM saturates on conversion; this differs from x64 which returns a sentinel value. + // Conversion to double is lossless, and allows for clamping. + + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero32(code, from, gpr_scratch); + } + code->CVTSS2SD(from, R(from)); + // First time is to set flags + if (round_towards_zero) { + code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + } else { + code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + } + // Clamp to output range + ZeroIfNaN64(code, from); + code->MINSD(from, code->MFloatMaxS32()); + code->MAXSD(from, code->MFloatMinS32()); + // Second time is for real + if (round_towards_zero) { + code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + } else { + code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + } + code->MOVD_xmm(to, R(gpr_scratch)); +} + +void EmitX64::EmitFPSingleToU32(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + bool round_towards_zero = inst->GetArg(1).GetU1(); + + X64Reg from = reg_alloc.UseScratchRegister(a, any_xmm); + X64Reg to = reg_alloc.DefRegister(inst, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + // ARM saturates on conversion; this differs from x64 which returns a sentinel value. + // Conversion to double is lossless, and allows for accurate clamping. + // + // Since SSE2 doesn't provide an unsigned conversion, we shift the range as appropriate. + // + // FIXME: Inexact exception not correctly signalled with the below code + + if (block.location.FPSCR_RMode() != Arm::FPRoundingMode::RoundTowardsZero && !round_towards_zero) { + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero32(code, from, gpr_scratch); + } + code->CVTSS2SD(from, R(from)); + ZeroIfNaN64(code, from); + // Bring into SSE range + code->ADDSD(from, code->MFloatMinS32()); + // First time is to set flags + code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + // Clamp to output range + code->MINSD(from, code->MFloatMaxS32()); + code->MAXSD(from, code->MFloatMinS32()); + // Actually convert + code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + // Bring back into original range + code->ADD(32, R(gpr_scratch), Imm32(2147483648u)); + code->MOVQ_xmm(to, R(gpr_scratch)); + } else { + X64Reg xmm_mask = reg_alloc.ScratchRegister(any_xmm); + X64Reg gpr_mask = reg_alloc.ScratchRegister(any_gpr); + + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero32(code, from, gpr_scratch); + } + code->CVTSS2SD(from, R(from)); + ZeroIfNaN64(code, from); + // Generate masks if out-of-signed-range + code->MOVAPS(xmm_mask, code->MFloatMaxS32()); + code->CMPLTSD(xmm_mask, R(from)); + code->MOVQ_xmm(R(gpr_mask), xmm_mask); + code->PAND(xmm_mask, code->MFloatMinS32()); + code->AND(32, R(gpr_mask), Imm32(2147483648u)); + // Bring into range if necessary + code->ADDSD(from, R(xmm_mask)); + // First time is to set flags + code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + // Clamp to output range + code->MINSD(from, code->MFloatMaxS32()); + code->MAXSD(from, code->MFloatMinU32()); + // Actually convert + code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + // Bring back into original range if necessary + code->ADD(32, R(gpr_scratch), R(gpr_mask)); + code->MOVQ_xmm(to, R(gpr_scratch)); + } +} + +void EmitX64::EmitFPDoubleToS32(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + bool round_towards_zero = inst->GetArg(1).GetU1(); + + X64Reg from = reg_alloc.UseScratchRegister(a, any_xmm); + X64Reg to = reg_alloc.DefRegister(inst, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + // ARM saturates on conversion; this differs from x64 which returns a sentinel value. + + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero64(code, from, gpr_scratch); + } + // First time is to set flags + if (round_towards_zero) { + code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + } else { + code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + } + // Clamp to output range + ZeroIfNaN64(code, from); + code->MINSD(from, code->MFloatMaxS32()); + code->MAXSD(from, code->MFloatMinS32()); + // Second time is for real + if (round_towards_zero) { + code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + } else { + code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + } + code->MOVD_xmm(to, R(gpr_scratch)); +} + +void EmitX64::EmitFPDoubleToU32(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + bool round_towards_zero = inst->GetArg(1).GetU1(); + + X64Reg from = reg_alloc.UseScratchRegister(a, any_xmm); + X64Reg to = reg_alloc.DefRegister(inst, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + X64Reg xmm_scratch = reg_alloc.ScratchRegister(any_xmm); + + // ARM saturates on conversion; this differs from x64 which returns a sentinel value. + // TODO: Use VCVTPD2UDQ when AVX512VL is available. + // FIXME: Inexact exception not correctly signalled with the below code + + if (block.location.FPSCR_RMode() != Arm::FPRoundingMode::RoundTowardsZero && !round_towards_zero) { + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero64(code, from, gpr_scratch); + } + ZeroIfNaN64(code, from); + // Bring into SSE range + code->ADDSD(from, code->MFloatMinS32()); + // First time is to set flags + code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + // Clamp to output range + code->MINSD(from, code->MFloatMaxS32()); + code->MAXSD(from, code->MFloatMinS32()); + // Actually convert + code->CVTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + // Bring back into original range + code->ADD(32, R(gpr_scratch), Imm32(2147483648u)); + code->MOVQ_xmm(to, R(gpr_scratch)); + } else { + X64Reg xmm_mask = reg_alloc.ScratchRegister(any_xmm); + X64Reg gpr_mask = reg_alloc.ScratchRegister(any_gpr); + + if (block.location.FPSCR_FTZ()) { + DenormalsAreZero64(code, from, gpr_scratch); + } + ZeroIfNaN64(code, from); + // Generate masks if out-of-signed-range + code->MOVAPS(xmm_mask, code->MFloatMaxS32()); + code->CMPLTSD(xmm_mask, R(from)); + code->MOVQ_xmm(R(gpr_mask), xmm_mask); + code->PAND(xmm_mask, code->MFloatMinS32()); + code->AND(32, R(gpr_mask), Imm32(2147483648u)); + // Bring into range if necessary + code->ADDSD(from, R(xmm_mask)); + // First time is to set flags + code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + // Clamp to output range + code->MINSD(from, code->MFloatMaxS32()); + code->MAXSD(from, code->MFloatMinU32()); + // Actually convert + code->CVTTSD2SI(gpr_scratch, R(from)); // 32 bit gpr + // Bring back into original range if necessary + code->ADD(32, R(gpr_scratch), R(gpr_mask)); + code->MOVQ_xmm(to, R(gpr_scratch)); + } +} + +void EmitX64::EmitFPS32ToSingle(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + bool round_to_nearest = inst->GetArg(1).GetU1(); + ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); + + X64Reg from = reg_alloc.UseRegister(a, any_xmm); + X64Reg to = reg_alloc.DefRegister(inst, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + code->MOVD_xmm(R(gpr_scratch), from); + code->CVTSI2SS(32, to, R(gpr_scratch)); +} + +void EmitX64::EmitFPU32ToSingle(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + bool round_to_nearest = inst->GetArg(1).GetU1(); + ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); + + X64Reg from = reg_alloc.UseRegister(a, any_xmm); + X64Reg to = reg_alloc.DefRegister(inst, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + code->MOVD_xmm(R(gpr_scratch), from); + code->CVTSI2SS(64, to, R(gpr_scratch)); +} + +void EmitX64::EmitFPS32ToDouble(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + bool round_to_nearest = inst->GetArg(1).GetU1(); + ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); + + X64Reg from = reg_alloc.UseRegister(a, any_xmm); + X64Reg to = reg_alloc.DefRegister(inst, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + code->MOVD_xmm(R(gpr_scratch), from); + code->CVTSI2SD(32, to, R(gpr_scratch)); +} + +void EmitX64::EmitFPU32ToDouble(IR::Block& block, IR::Inst* inst) { + IR::Value a = inst->GetArg(0); + bool round_to_nearest = inst->GetArg(1).GetU1(); + ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented"); + + X64Reg from = reg_alloc.UseRegister(a, any_xmm); + X64Reg to = reg_alloc.DefRegister(inst, any_xmm); + X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr); + + code->MOVD_xmm(R(gpr_scratch), from); + code->CVTSI2SD(64, to, R(gpr_scratch)); +} + + void EmitX64::EmitClearExclusive(IR::Block&, IR::Inst*) { code->MOV(8, MDisp(R15, offsetof(JitState, exclusive_state)), Imm8(0)); } diff --git a/src/frontend/decoder/vfp2.h b/src/frontend/decoder/vfp2.h index 3891af53..74eedb61 100644 --- a/src/frontend/decoder/vfp2.h +++ b/src/frontend/decoder/vfp2.h @@ -89,10 +89,12 @@ boost::optional&> DecodeVFP2(u32 instruction) { INST(&V::vfp2_VABS, "VABS", "cccc11101D110000dddd101z11M0mmmm"), INST(&V::vfp2_VNEG, "VNEG", "cccc11101D110001dddd101z01M0mmmm"), INST(&V::vfp2_VSQRT, "VSQRT", "cccc11101D110001dddd101z11M0mmmm"), + INST(&V::vfp2_VCVT_f_to_f, "VCVT (f32<->f64)", "cccc11101D110111dddd101z11M0mmmm"), + INST(&V::vfp2_VCVT_to_float, "VCVT (to float)", "cccc11101D111000dddd101zs1M0mmmm"), + INST(&V::vfp2_VCVT_to_u32, "VCVT (to u32)", "cccc11101D111100dddd101zr1M0mmmm"), + INST(&V::vfp2_VCVT_to_s32, "VCVT (to s32)", "cccc11101D111101dddd101zr1M0mmmm"), // VCMP // VCMPE - // VCVT - // VCVTR // Extension register load-store instructions INST(&V::vfp2_VPUSH, "VPUSH", "cccc11010D101101dddd101zvvvvvvvv"), diff --git a/src/frontend/disassembler/disassembler_arm.cpp b/src/frontend/disassembler/disassembler_arm.cpp index 3a4377e6..2b342db9 100644 --- a/src/frontend/disassembler/disassembler_arm.cpp +++ b/src/frontend/disassembler/disassembler_arm.cpp @@ -871,6 +871,22 @@ public: return Common::StringFromFormat("vsqrt%s.%s %s, %s", CondToString(cond), sz ? "f64" : "f32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str()); } + std::string vfp2_VCVT_f_to_f(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) { + return Common::StringFromFormat("vcvt%s.%s.%s %s, %s", CondToString(cond), !sz ? "f64" : "f32", sz ? "f64" : "f32", FPRegStr(!sz, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str()); + } + + std::string vfp2_VCVT_to_float(Cond cond, bool D, size_t Vd, bool sz, bool is_signed, bool M, size_t Vm) { + return Common::StringFromFormat("vcvt%s.%s.%s %s, %s", CondToString(cond), sz ? "f64" : "f32", is_signed ? "s32" : "u32", FPRegStr(sz, Vd, D).c_str(), FPRegStr(false, Vm, M).c_str()); + } + + std::string vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm) { + return Common::StringFromFormat("vcvt%s%s.u32.%s %s, %s", round_towards_zero ? "" : "r", CondToString(cond), sz ? "f64" : "f32", FPRegStr(false, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str()); + } + + std::string vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm) { + return Common::StringFromFormat("vcvt%s%s.s32.%s %s, %s", round_towards_zero ? "" : "r", CondToString(cond), sz ? "f64" : "f32", FPRegStr(false, Vd, D).c_str(), FPRegStr(sz, Vm, M).c_str()); + } + std::string vfp2_VPOP(Cond cond, bool D, size_t Vd, bool sz, Imm8 imm8) { return Common::StringFromFormat("vpop%s %s(+%u)", CondToString(cond), FPRegStr(sz, Vd, D).c_str(), imm8 >> (sz ? 1 : 0)); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 1fd5d5f4..58ad886a 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -406,6 +406,56 @@ IR::Value IREmitter::FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_ return Inst(IR::Opcode::FPSub64, {a, b}); } +IR::Value IREmitter::FPDoubleToSingle(const IR::Value& a, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPDoubleToSingle, {a}); +} + +IR::Value IREmitter::FPSingleToDouble(const IR::Value& a, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPSingleToDouble, {a}); +} + +IR::Value IREmitter::FPSingleToS32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPSingleToS32, {a, Imm1(round_towards_zero)}); +} + +IR::Value IREmitter::FPSingleToU32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPSingleToU32, {a, Imm1(round_towards_zero)}); +} + +IR::Value IREmitter::FPDoubleToS32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPDoubleToS32, {a, Imm1(round_towards_zero)}); +} + +IR::Value IREmitter::FPDoubleToU32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPDoubleToU32, {a, Imm1(round_towards_zero)}); +} + +IR::Value IREmitter::FPS32ToSingle(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPS32ToSingle, {a, Imm1(round_to_nearest)}); +} + +IR::Value IREmitter::FPU32ToSingle(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPU32ToSingle, {a, Imm1(round_to_nearest)}); +} + +IR::Value IREmitter::FPS32ToDouble(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPS32ToDouble, {a, Imm1(round_to_nearest)}); +} + +IR::Value IREmitter::FPU32ToDouble(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled) { + ASSERT(fpscr_controlled); + return Inst(IR::Opcode::FPU32ToDouble, {a, Imm1(round_to_nearest)}); +} + void IREmitter::ClearExlcusive() { Inst(IR::Opcode::ClearExclusive, {}); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 3de5512e..efadaa49 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -140,6 +140,16 @@ public: IR::Value FPSqrt64(const IR::Value& a); IR::Value FPSub32(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); IR::Value FPSub64(const IR::Value& a, const IR::Value& b, bool fpscr_controlled); + IR::Value FPDoubleToSingle(const IR::Value& a, bool fpscr_controlled); + IR::Value FPSingleToDouble(const IR::Value& a, bool fpscr_controlled); + IR::Value FPSingleToS32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled); + IR::Value FPSingleToU32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled); + IR::Value FPDoubleToS32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled); + IR::Value FPDoubleToU32(const IR::Value& a, bool round_towards_zero, bool fpscr_controlled); + IR::Value FPS32ToSingle(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled); + IR::Value FPU32ToSingle(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled); + IR::Value FPS32ToDouble(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled); + IR::Value FPU32ToDouble(const IR::Value& a, bool round_to_nearest, bool fpscr_controlled); void ClearExlcusive(); void SetExclusive(const IR::Value& vaddr, size_t byte_size); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index df01681e..192febeb 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -74,7 +74,7 @@ OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32 OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 ) OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 ) -// Floating-point +// Floating-point operations OPCODE(TransferToFP32, T::F32, T::U32 ) OPCODE(TransferToFP64, T::F64, T::U64 ) OPCODE(TransferFromFP32, T::U32, T::F32 ) @@ -94,6 +94,18 @@ OPCODE(FPSqrt64, T::F64, T::F64 OPCODE(FPSub32, T::F32, T::F32, T::F32 ) OPCODE(FPSub64, T::F64, T::F64, T::F64 ) +// Floating-point conversions +OPCODE(FPSingleToDouble, T::F64, T::F32 ) +OPCODE(FPDoubleToSingle, T::F32, T::F64 ) +OPCODE(FPSingleToU32, T::F32, T::F32, T::U1 ) +OPCODE(FPSingleToS32, T::F32, T::F32, T::U1 ) +OPCODE(FPDoubleToU32, T::F32, T::F64, T::U1 ) +OPCODE(FPDoubleToS32, T::F32, T::F64, T::U1 ) +OPCODE(FPU32ToSingle, T::F32, T::F32, T::U1 ) +OPCODE(FPS32ToSingle, T::F32, T::F32, T::U1 ) +OPCODE(FPU32ToDouble, T::F64, T::F32, T::U1 ) +OPCODE(FPS32ToDouble, T::F64, T::F32, T::U1 ) + // Memory access OPCODE(ClearExclusive, T::Void, ) OPCODE(SetExclusive, T::Void, T::U32, T::U8 ) diff --git a/src/frontend/translate/translate_arm/translate_arm.h b/src/frontend/translate/translate_arm/translate_arm.h index fde85f3d..798185e2 100644 --- a/src/frontend/translate/translate_arm/translate_arm.h +++ b/src/frontend/translate/translate_arm/translate_arm.h @@ -357,6 +357,10 @@ struct ArmTranslatorVisitor final { bool vfp2_VABS(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm); bool vfp2_VNEG(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm); bool vfp2_VSQRT(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm); + bool vfp2_VCVT_f_to_f(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm); + bool vfp2_VCVT_to_float(Cond cond, bool D, size_t Vd, bool sz, bool is_signed, bool M, size_t Vm); + bool vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm); + bool vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm); // Floating-point load-store instructions bool vfp2_VLDR(Cond cond, bool U, bool D, Reg n, size_t Vd, bool sz, Imm8 imm8); diff --git a/src/frontend/translate/translate_arm/vfp2.cpp b/src/frontend/translate/translate_arm/vfp2.cpp index 8dd864cd..43ca3d13 100644 --- a/src/frontend/translate/translate_arm/vfp2.cpp +++ b/src/frontend/translate/translate_arm/vfp2.cpp @@ -360,6 +360,72 @@ bool ArmTranslatorVisitor::vfp2_VSQRT(Cond cond, bool D, size_t Vd, bool sz, boo return true; } +bool ArmTranslatorVisitor::vfp2_VCVT_f_to_f(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm) { + ExtReg d = ToExtReg(!sz, Vd, D); // Destination is of opposite size to source + ExtReg m = ToExtReg(sz, Vm, M); + // VCVT.F64.F32 + // VCVT.F32.F64
+ if (ConditionPassed(cond)) { + auto a = ir.GetExtendedRegister(m); + auto result = sz + ? ir.FPDoubleToSingle(a, true) + : ir.FPSingleToDouble(a, true); + ir.SetExtendedRegister(d, result); + } + return true; +} + +bool ArmTranslatorVisitor::vfp2_VCVT_to_float(Cond cond, bool D, size_t Vd, bool sz, bool is_signed, bool M, size_t Vm) { + ExtReg d = ToExtReg(sz, Vd, D); + ExtReg m = ToExtReg(false, Vm, M); + bool round_to_nearest = false; + // VCVT.F32.{S32,U32} , + // VCVT.F64.{S32,U32} , + if (ConditionPassed(cond)) { + auto a = ir.GetExtendedRegister(m); + auto result = sz + ? is_signed + ? ir.FPS32ToDouble(a, round_to_nearest, true) + : ir.FPU32ToDouble(a, round_to_nearest, true) + : is_signed + ? ir.FPS32ToSingle(a, round_to_nearest, true) + : ir.FPU32ToSingle(a, round_to_nearest, true); + ir.SetExtendedRegister(d, result); + } + return true; +} + +bool ArmTranslatorVisitor::vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm) { + ExtReg d = ToExtReg(false, Vd, D); + ExtReg m = ToExtReg(sz, Vm, M); + // VCVT{,R}.U32.F32 , + // VCVT{,R}.U32.F64 , + if (ConditionPassed(cond)) { + auto a = ir.GetExtendedRegister(m); + auto result = sz + ? ir.FPDoubleToU32(a, round_towards_zero, true) + : ir.FPSingleToU32(a, round_towards_zero, true); + ir.SetExtendedRegister(d, result); + } + return true; +} + +bool ArmTranslatorVisitor::vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm) { + ExtReg d = ToExtReg(false, Vd, D); + ExtReg m = ToExtReg(sz, Vm, M); + // VCVT{,R}.S32.F32 , + // VCVT{,R}.S32.F64 , + if (ConditionPassed(cond)) { + auto a = ir.GetExtendedRegister(m); + auto result = sz + ? ir.FPDoubleToS32(a, round_towards_zero, true) + : ir.FPSingleToS32(a, round_towards_zero, true); + ir.SetExtendedRegister(d, result); + } + return true; +} + + bool ArmTranslatorVisitor::vfp2_VPOP(Cond cond, bool D, size_t Vd, bool sz, Imm8 imm8) { const ExtReg d = ToExtReg(sz, Vd, D); const size_t regs = sz ? imm8 >> 1 : imm8;