diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp index f38d1950..715f6cb3 100644 --- a/src/backend/x64/emit_x64_floating_point.cpp +++ b/src/backend/x64/emit_x64_floating_point.cpp @@ -50,6 +50,9 @@ constexpr u64 f64_nan = 0x7ff8000000000000u; constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu; constexpr u64 f64_smallest_normal = 0x0010000000000000u; +constexpr u64 f64_max_s16 = 0x40dfffc000000000u; // 32767 as a double +constexpr u64 f64_min_u16 = 0x0000000000000000u; // 0 as a double +constexpr u64 f64_max_u16 = 0x40efffe000000000u; // 65535 as a double constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double @@ -1262,7 +1265,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.mov(result, unsigned_ ? 0xFFFF'FFFF'FFFF'FFFF : 0x7FFF'FFFF'FFFF'FFFF); code.jmp(end, code.T_NEAR); code.SwitchToNearCode(); - } else { + } else if constexpr (isize == 32) { code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32)); if (unsigned_) { code.maxsd(src, code.MConst(xword, f64_min_u32)); @@ -1270,6 +1273,14 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } else { code.cvttsd2si(result.cvt32(), src); } + } else { + code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u16 : f64_max_s16)); + if (unsigned_) { + code.maxsd(src, code.MConst(xword, f64_min_u16)); + code.cvttsd2si(result, src); // 64 bit gpr + } else { + code.cvttsd2si(result.cvt32(), src); + } } ctx.reg_alloc.DefineValue(inst, result); @@ -1312,6 +1323,10 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.CallFunction(lut.at(std::make_tuple(fbits, rounding_mode))); } +void EmitX64::EmitFPDoubleToFixedS16(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<64, false, 16>(code, ctx, inst); +} + void EmitX64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<64, false, 32>(code, ctx, inst); } @@ -1320,6 +1335,10 @@ void EmitX64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<64, false, 64>(code, ctx, inst); } +void EmitX64::EmitFPDoubleToFixedU16(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<64, true, 16>(code, ctx, inst); +} + void EmitX64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<64, true, 32>(code, ctx, inst); } @@ -1328,6 +1347,10 @@ void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<64, true, 64>(code, ctx, inst); } +void EmitX64::EmitFPHalfToFixedS16(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<16, false, 16>(code, ctx, inst); +} + void EmitX64::EmitFPHalfToFixedS32(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<16, false, 32>(code, ctx, inst); } @@ -1336,6 +1359,10 @@ void EmitX64::EmitFPHalfToFixedS64(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<16, false, 64>(code, ctx, inst); } +void EmitX64::EmitFPHalfToFixedU16(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<16, true, 16>(code, ctx, inst); +} + void EmitX64::EmitFPHalfToFixedU32(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<16, true, 32>(code, ctx, inst); } @@ -1344,6 +1371,10 @@ void EmitX64::EmitFPHalfToFixedU64(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<16, true, 64>(code, ctx, inst); } +void EmitX64::EmitFPSingleToFixedS16(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<32, false, 16>(code, ctx, inst); +} + void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<32, false, 32>(code, ctx, inst); } @@ -1352,6 +1383,10 @@ void EmitX64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<32, false, 64>(code, ctx, inst); } +void EmitX64::EmitFPSingleToFixedU16(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixed<32, true, 16>(code, ctx, inst); +} + void EmitX64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<32, true, 32>(code, ctx, inst); } @@ -1360,6 +1395,46 @@ void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) { EmitFPToFixed<32, true, 64>(code, ctx, inst); } +void EmitX64::EmitFPFixedS16ToSingle(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(args[0]).cvt16(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const size_t fbits = args[1].GetImmediateU8(); + [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); // Not required + + code.movsx(tmp, from); + code.cvtsi2ss(result, tmp); + + if (fbits != 0) { + const u32 scale_factor = static_cast((127 - fbits) << 23); + code.mulss(result, code.MConst(xword, scale_factor)); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitX64::EmitFPFixedU16ToSingle(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(args[0]).cvt16(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const size_t fbits = args[1].GetImmediateU8(); + [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); // Not required + + code.movzx(tmp, from); + code.cvtsi2ss(result, tmp); + + if (fbits != 0) { + const u32 scale_factor = static_cast((127 - fbits) << 23); + code.mulss(result, code.MConst(xword, scale_factor)); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + void EmitX64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -1367,9 +1442,15 @@ void EmitX64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPCR().RMode()); - code.cvtsi2ss(result, from); + if (rounding_mode == ctx.FPCR().RMode()) { + code.cvtsi2ss(result, from); + } else { + ASSERT(rounding_mode == FP::RoundingMode::ToNearest_TieEven); + code.EnterStandardASIMD(); + code.cvtsi2ss(result, from); + code.LeaveStandardASIMD(); + } if (fbits != 0) { const u32 scale_factor = static_cast((127 - fbits) << 23); @@ -1385,16 +1466,26 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPCR().RMode()); - if (code.HasAVX512_Skylake()) { - const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); - code.vcvtusi2ss(result, result, from.cvt32()); + const auto op = [&]{ + if (code.HasAVX512_Skylake()) { + const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); + code.vcvtusi2ss(result, result, from.cvt32()); + } else { + // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed + const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]); + code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary + code.cvtsi2ss(result, from); + } + }; + + if (rounding_mode == ctx.FPCR().RMode()) { + op(); } else { - // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed - const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]); - code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary - code.cvtsi2ss(result, from); + ASSERT(rounding_mode == FP::RoundingMode::ToNearest_TieEven); + code.EnterStandardASIMD(); + op(); + code.LeaveStandardASIMD(); } if (fbits != 0) { @@ -1405,14 +1496,53 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +void EmitX64::EmitFPFixedS16ToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(args[0]).cvt16(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const size_t fbits = args[1].GetImmediateU8(); + [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); // Not required + + code.movsx(tmp, from); + code.cvtsi2sd(result, tmp); + + if (fbits != 0) { + const u64 scale_factor = static_cast((1023 - fbits) << 52); + code.mulsd(result, code.MConst(xword, scale_factor)); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitX64::EmitFPFixedU16ToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(args[0]).cvt16(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const size_t fbits = args[1].GetImmediateU8(); + [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); // Not required + + code.movzx(tmp, from); + code.cvtsi2sd(result, tmp); + + if (fbits != 0) { + const u64 scale_factor = static_cast((1023 - fbits) << 52); + code.mulsd(result, code.MConst(xword, scale_factor)); + } + + ctx.reg_alloc.DefineValue(inst, result); +} + void EmitX64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(args[0]).cvt32(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const size_t fbits = args[1].GetImmediateU8(); - const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPCR().RMode()); + [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); // Not required code.cvtsi2sd(result, from); @@ -1424,6 +1554,31 @@ void EmitX64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(); + const size_t fbits = args[1].GetImmediateU8(); + [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); // Not required + + if (code.HasAVX512_Skylake()) { + const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); + code.vcvtusi2sd(to, to, from.cvt32()); + } else { + // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed + const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]); + code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary + code.cvtsi2sd(to, from); + } + + if (fbits != 0) { + const u64 scale_factor = static_cast((1023 - fbits) << 52); + code.mulsd(to, code.MConst(xword, scale_factor)); + } + + ctx.reg_alloc.DefineValue(inst, to); +} + void EmitX64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -1462,32 +1617,6 @@ void EmitX64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(); - const size_t fbits = args[1].GetImmediateU8(); - const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); - ASSERT(rounding_mode == ctx.FPCR().RMode()); - - if (code.HasAVX512_Skylake()) { - const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); - code.vcvtusi2sd(to, to, from.cvt32()); - } else { - // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed - const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]); - code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary - code.cvtsi2sd(to, from); - } - - if (fbits != 0) { - const u64 scale_factor = static_cast((1023 - fbits) << 52); - code.mulsd(to, code.MConst(xword, scale_factor)); - } - - ctx.reg_alloc.DefineValue(inst, to); -} - void EmitX64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/frontend/A32/decoder/vfp.inc b/src/frontend/A32/decoder/vfp.inc index 83dc4529..36283634 100644 --- a/src/frontend/A32/decoder/vfp.inc +++ b/src/frontend/A32/decoder/vfp.inc @@ -30,10 +30,10 @@ INST(vfp_VRINTR, "VRINTR", "cccc11101D110110dddd101z0 INST(vfp_VRINTZ, "VRINTZ", "cccc11101D110110dddd101z11M0mmmm") // VFPv5 INST(vfp_VCVT_f_to_f, "VCVT (f32<->f64)", "cccc11101D110111dddd101z11M0mmmm") // VFPv2 INST(vfp_VCVT_from_int, "VCVT (from int)", "cccc11101D111000dddd101zs1M0mmmm") // VFPv2 -//INST(vfp_VCVT_from_fixed, "VCVT (from fixed)", "cccc11101D11101Udddd101zx1i0vvvv") // VFPv3 +INST(vfp_VCVT_from_fixed, "VCVT (from fixed)", "cccc11101D11101Udddd101zx1i0vvvv") // VFPv3 INST(vfp_VCVT_to_u32, "VCVT (to u32)", "cccc11101D111100dddd101zr1M0mmmm") // VFPv2 INST(vfp_VCVT_to_s32, "VCVT (to s32)", "cccc11101D111101dddd101zr1M0mmmm") // VFPv2 -//INST(vfp_VCVT_to_fixed, "VCVT (to fixed)", "cccc11101D11111Udddd101zx1i0vvvv") // VFPv3 +INST(vfp_VCVT_to_fixed, "VCVT (to fixed)", "cccc11101D11111Udddd101zx1i0vvvv") // VFPv3 INST(vfp_VRINT_rm, "VRINT{A,N,P,M}", "111111101D1110mmdddd101z01M0mmmm") // VFPv5 INST(vfp_VCVT_rm, "VCVT{A,N,P,M}", "111111101D1111mmdddd101zU1M0mmmm") // VFPv5 diff --git a/src/frontend/A32/disassembler/disassembler_arm.cpp b/src/frontend/A32/disassembler/disassembler_arm.cpp index 087a697d..3a693303 100644 --- a/src/frontend/A32/disassembler/disassembler_arm.cpp +++ b/src/frontend/A32/disassembler/disassembler_arm.cpp @@ -1436,6 +1436,12 @@ public: return fmt::format("vcvt{}.{}.{} {}, {}", CondToString(cond), sz ? "f64" : "f32", is_signed ? "s32" : "u32", FPRegStr(sz, Vd, D), FPRegStr(false, Vm, M)); } + std::string vfp_VCVT_from_fixed(Cond cond, bool D, bool U, size_t Vd, bool sz, bool sx, Imm<1> i, Imm<4> imm4) { + const size_t size = sx ? 32 : 16; + const size_t fbits = size - concatenate(imm4, i).ZeroExtend(); + return fmt::format("vcvt{}.{}.{}{} {}, {}, #{}", CondToString(cond), sz ? "f64" : "f32", U ? 'u' : 's', size, FPRegStr(sz, Vd, D), FPRegStr(sz, Vd, D), fbits); + } + std::string vfp_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm) { return fmt::format("vcvt{}{}.u32.{} {}, {}", round_towards_zero ? "" : "r", CondToString(cond), sz ? "f64" : "f32", FPRegStr(false, Vd, D), FPRegStr(sz, Vm, M)); } @@ -1444,6 +1450,12 @@ public: return fmt::format("vcvt{}{}.s32.{} {}, {}", round_towards_zero ? "" : "r", CondToString(cond), sz ? "f64" : "f32", FPRegStr(false, Vd, D), FPRegStr(sz, Vm, M)); } + std::string vfp_VCVT_to_fixed(Cond cond, bool D, bool U, size_t Vd, bool sz, bool sx, Imm<1> i, Imm<4> imm4) { + const size_t size = sx ? 32 : 16; + const size_t fbits = size - concatenate(imm4, i).ZeroExtend(); + return fmt::format("vcvt{}.{}{}.{} {}, {}, #{}", CondToString(cond), U ? 'u' : 's', size, sz ? "f64" : "f32", FPRegStr(sz, Vd, D), FPRegStr(sz, Vd, D), fbits); + } + std::string vfp_VRINT_rm(bool D, size_t rm, size_t Vd, bool sz, bool M, size_t Vm) { return fmt::format("vrint{}.{} {}, {}", "anpm"[rm], sz ? "f64" : "f32", FPRegStr(sz, Vd, D), FPRegStr(sz, Vm, M)); } diff --git a/src/frontend/A32/translate/impl/translate_arm.h b/src/frontend/A32/translate/impl/translate_arm.h index 2db6acfc..3a6f335d 100644 --- a/src/frontend/A32/translate/impl/translate_arm.h +++ b/src/frontend/A32/translate/impl/translate_arm.h @@ -427,8 +427,10 @@ struct ArmTranslatorVisitor final { bool vfp_VRINTZ(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm); bool vfp_VCVT_f_to_f(Cond cond, bool D, size_t Vd, bool sz, bool M, size_t Vm); bool vfp_VCVT_from_int(Cond cond, bool D, size_t Vd, bool sz, bool is_signed, bool M, size_t Vm); + bool vfp_VCVT_from_fixed(Cond cond, bool D, bool U, size_t Vd, bool sz, bool sx, Imm<1> i, Imm<4> imm4); bool vfp_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm); bool vfp_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm); + bool vfp_VCVT_to_fixed(Cond cond, bool D, bool U, size_t Vd, bool sz, bool sx, Imm<1> i, Imm<4> imm4); bool vfp_VRINT_rm(bool D, size_t rm, size_t Vd, bool sz, bool M, size_t Vm); bool vfp_VCVT_rm(bool D, size_t rm, size_t Vd, bool sz, bool U, bool M, size_t Vm); diff --git a/src/frontend/A32/translate/impl/vfp.cpp b/src/frontend/A32/translate/impl/vfp.cpp index 7a1bba69..8ef34d38 100644 --- a/src/frontend/A32/translate/impl/vfp.cpp +++ b/src/frontend/A32/translate/impl/vfp.cpp @@ -966,6 +966,38 @@ bool ArmTranslatorVisitor::vfp_VCVT_from_int(Cond cond, bool D, size_t Vd, bool return true; } +// VCVT.F32.{S16,U16,S32,U32} , +// VCVT.F64.{S16,U16,S32,U32} , +bool ArmTranslatorVisitor::vfp_VCVT_from_fixed(Cond cond, bool D, bool U, size_t Vd, bool sz, bool sx, Imm<1> i, Imm<4> imm4) { + if (!ConditionPassed(cond)) { + return true; + } + + const size_t size = sx ? 32 : 16; + const size_t fbits = size - concatenate(imm4, i).ZeroExtend(); + + if (fbits > size) { + return UnpredictableInstruction(); + } + + const auto d = ToExtReg(sz, Vd, D); + const auto rounding_mode = FP::RoundingMode::ToNearest_TieEven; + const auto reg_d = ir.GetExtendedRegister(d); + const auto source = ir.LeastSignificant(size, reg_d); + + if (sz) { + const auto result = U ? ir.FPUnsignedFixedToDouble(source, fbits, rounding_mode) + : ir.FPSignedFixedToDouble(source, fbits, rounding_mode); + ir.SetExtendedRegister(d, result); + } else { + const auto result = U ? ir.FPUnsignedFixedToSingle(source, fbits, rounding_mode) + : ir.FPSignedFixedToSingle(source, fbits, rounding_mode); + ir.SetExtendedRegister(d, result); + } + + return true; +} + // VCVT{,R}.U32.F32 , // VCVT{,R}.U32.F64 , bool ArmTranslatorVisitor::vfp_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool sz, bool round_towards_zero, bool M, size_t Vm) { @@ -998,6 +1030,42 @@ bool ArmTranslatorVisitor::vfp_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool sz return true; } +// VCVT.{S16,U16,S32,U32}.F32 , +// VCVT.{S16,U16,S32,U32}.F64 , +bool ArmTranslatorVisitor::vfp_VCVT_to_fixed(Cond cond, bool D, bool U, size_t Vd, bool sz, bool sx, Imm<1> i, Imm<4> imm4) { + if (!ConditionPassed(cond)) { + return true; + } + + const size_t size = sx ? 32 : 16; + const size_t fbits = size - concatenate(imm4, i).ZeroExtend(); + + if (fbits > size) { + return UnpredictableInstruction(); + } + + const auto d = ToExtReg(sz, Vd, D); + const auto rounding_mode = FP::RoundingMode::TowardsZero; + const auto reg_d = ir.GetExtendedRegister(d); + + const auto result = [&]() -> IR::U16U32U64 { + if (sx) { + return U ? ir.FPToFixedU32(reg_d, fbits, rounding_mode) + : ir.FPToFixedS32(reg_d, fbits, rounding_mode); + } else { + return U ? ir.FPToFixedU16(reg_d, fbits, rounding_mode) + : ir.FPToFixedS16(reg_d, fbits, rounding_mode); + } + }(); + + if (sz) { + ir.SetExtendedRegister(d, U ? ir.ZeroExtendToLong(result) : ir.SignExtendToLong(result)); + } else { + ir.SetExtendedRegister(d, U ? ir.ZeroExtendToWord(result) : ir.SignExtendToWord(result)); + } + return true; +} + // VRINT{A,N,P,M}.F32 , // VRINT{A,N,P,M}.F64
, bool ArmTranslatorVisitor::vfp_VRINT_rm(bool D, size_t rm, size_t Vd, bool sz, bool M, size_t Vm) { diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 7d876d7c..7f19e4f8 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -2150,6 +2150,24 @@ U16 IREmitter::FPSingleToHalf(const U32& a, FP::RoundingMode rounding) { return Inst(Opcode::FPSingleToHalf, a, Imm8(static_cast(rounding))); } +U16 IREmitter::FPToFixedS16(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 16); + + const U8 fbits_imm = Imm8(static_cast(fbits)); + const U8 rounding_imm = Imm8(static_cast(rounding)); + + switch (a.GetType()) { + case Type::U16: + return Inst(Opcode::FPHalfToFixedS16, a, fbits_imm, rounding_imm); + case Type::U32: + return Inst(Opcode::FPSingleToFixedS16, a, fbits_imm, rounding_imm); + case Type::U64: + return Inst(Opcode::FPDoubleToFixedS16, a, fbits_imm, rounding_imm); + default: + UNREACHABLE(); + } +} + U32 IREmitter::FPToFixedS32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) { ASSERT(fbits <= 32); @@ -2186,6 +2204,24 @@ U64 IREmitter::FPToFixedS64(const U16U32U64& a, size_t fbits, FP::RoundingMode r } } +U16 IREmitter::FPToFixedU16(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 16); + + const U8 fbits_imm = Imm8(static_cast(fbits)); + const U8 rounding_imm = Imm8(static_cast(rounding)); + + switch (a.GetType()) { + case Type::U16: + return Inst(Opcode::FPHalfToFixedU16, a, fbits_imm, rounding_imm); + case Type::U32: + return Inst(Opcode::FPSingleToFixedU16, a, fbits_imm, rounding_imm); + case Type::U64: + return Inst(Opcode::FPDoubleToFixedU16, a, fbits_imm, rounding_imm); + default: + UNREACHABLE(); + } +} + U32 IREmitter::FPToFixedU32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) { ASSERT(fbits <= 32); @@ -2222,13 +2258,15 @@ U64 IREmitter::FPToFixedU64(const U16U32U64& a, size_t fbits, FP::RoundingMode r } } -U32 IREmitter::FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding) { - ASSERT(fbits <= (a.GetType() == Type::U32 ? 32 : 64)); +U32 IREmitter::FPSignedFixedToSingle(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= (a.GetType() == Type::U16 ? 16 : (a.GetType() == Type::U32 ? 32 : 64))); const IR::U8 fbits_imm = Imm8(static_cast(fbits)); const IR::U8 rounding_imm = Imm8(static_cast(rounding)); switch (a.GetType()) { + case Type::U16: + return Inst(Opcode::FPFixedS16ToSingle, a, fbits_imm, rounding_imm); case Type::U32: return Inst(Opcode::FPFixedS32ToSingle, a, fbits_imm, rounding_imm); case Type::U64: @@ -2238,13 +2276,15 @@ U32 IREmitter::FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::Rounding } } -U32 IREmitter::FPUnsignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding) { - ASSERT(fbits <= (a.GetType() == Type::U32 ? 32 : 64)); +U32 IREmitter::FPUnsignedFixedToSingle(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= (a.GetType() == Type::U16 ? 16 : (a.GetType() == Type::U32 ? 32 : 64))); const IR::U8 fbits_imm = Imm8(static_cast(fbits)); const IR::U8 rounding_imm = Imm8(static_cast(rounding)); switch (a.GetType()) { + case Type::U16: + return Inst(Opcode::FPFixedU16ToSingle, a, fbits_imm, rounding_imm); case Type::U32: return Inst(Opcode::FPFixedU32ToSingle, a, fbits_imm, rounding_imm); case Type::U64: @@ -2254,13 +2294,15 @@ U32 IREmitter::FPUnsignedFixedToSingle(const U32U64& a, size_t fbits, FP::Roundi } } -U64 IREmitter::FPSignedFixedToDouble(const U32U64& a, size_t fbits, FP::RoundingMode rounding) { - ASSERT(fbits <= (a.GetType() == Type::U32 ? 32 : 64)); +U64 IREmitter::FPSignedFixedToDouble(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= (a.GetType() == Type::U16 ? 16 : (a.GetType() == Type::U32 ? 32 : 64))); const IR::U8 fbits_imm = Imm8(static_cast(fbits)); const IR::U8 rounding_imm = Imm8(static_cast(rounding)); switch (a.GetType()) { + case Type::U16: + return Inst(Opcode::FPFixedS16ToDouble, a, fbits_imm, rounding_imm); case Type::U32: return Inst(Opcode::FPFixedS32ToDouble, a, fbits_imm, rounding_imm); case Type::U64: @@ -2270,13 +2312,15 @@ U64 IREmitter::FPSignedFixedToDouble(const U32U64& a, size_t fbits, FP::Rounding } } -U64 IREmitter::FPUnsignedFixedToDouble(const U32U64& a, size_t fbits, FP::RoundingMode rounding) { - ASSERT(fbits <= (a.GetType() == Type::U32 ? 32 : 64)); +U64 IREmitter::FPUnsignedFixedToDouble(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= (a.GetType() == Type::U16 ? 16 : (a.GetType() == Type::U32 ? 32 : 64))); const IR::U8 fbits_imm = Imm8(static_cast(fbits)); const IR::U8 rounding_imm = Imm8(static_cast(rounding)); switch (a.GetType()) { + case Type::U16: + return Inst(Opcode::FPFixedU16ToDouble, a, fbits_imm, rounding_imm); case Type::U32: return Inst(Opcode::FPFixedU32ToDouble, a, fbits_imm, rounding_imm); case Type::U64: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 163068c8..95051edb 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -338,14 +338,16 @@ public: U32 FPHalfToSingle(const U16& a, FP::RoundingMode rounding); U16 FPSingleToHalf(const U32& a, FP::RoundingMode rounding); U64 FPSingleToDouble(const U32& a, FP::RoundingMode rounding); + U16 FPToFixedS16(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); U32 FPToFixedS32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); U64 FPToFixedS64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); + U16 FPToFixedU16(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); U32 FPToFixedU32(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); U64 FPToFixedU64(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); - U32 FPSignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding); - U32 FPUnsignedFixedToSingle(const U32U64& a, size_t fbits, FP::RoundingMode rounding); - U64 FPSignedFixedToDouble(const U32U64& a, size_t fbits, FP::RoundingMode rounding); - U64 FPUnsignedFixedToDouble(const U32U64& a, size_t fbits, FP::RoundingMode rounding); + U32 FPSignedFixedToSingle(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); + U32 FPUnsignedFixedToSingle(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); + U64 FPSignedFixedToDouble(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); + U64 FPUnsignedFixedToDouble(const U16U32U64& a, size_t fbits, FP::RoundingMode rounding); U128 FPVectorAbs(size_t esize, const U128& a); U128 FPVectorAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index b6f35a8f..4314d5a3 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -560,24 +560,34 @@ OPCODE(FPSingleToDouble, U64, U32, OPCODE(FPSingleToHalf, U16, U32, U8 ) OPCODE(FPDoubleToHalf, U16, U64, U8 ) OPCODE(FPDoubleToSingle, U32, U64, U8 ) +OPCODE(FPDoubleToFixedS16, U16, U64, U8, U8 ) OPCODE(FPDoubleToFixedS32, U32, U64, U8, U8 ) OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 ) +OPCODE(FPDoubleToFixedU16, U16, U64, U8, U8 ) OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 ) OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 ) +OPCODE(FPHalfToFixedS16, U16, U16, U8, U8 ) OPCODE(FPHalfToFixedS32, U32, U16, U8, U8 ) OPCODE(FPHalfToFixedS64, U64, U16, U8, U8 ) +OPCODE(FPHalfToFixedU16, U16, U16, U8, U8 ) OPCODE(FPHalfToFixedU32, U32, U16, U8, U8 ) OPCODE(FPHalfToFixedU64, U64, U16, U8, U8 ) +OPCODE(FPSingleToFixedS16, U16, U32, U8, U8 ) OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 ) OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 ) +OPCODE(FPSingleToFixedU16, U16, U32, U8, U8 ) OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 ) OPCODE(FPSingleToFixedU64, U64, U32, U8, U8 ) +OPCODE(FPFixedU16ToSingle, U32, U16, U8, U8 ) +OPCODE(FPFixedS16ToSingle, U32, U16, U8, U8 ) +OPCODE(FPFixedU16ToDouble, U64, U16, U8, U8 ) +OPCODE(FPFixedS16ToDouble, U64, U16, U8, U8 ) OPCODE(FPFixedU32ToSingle, U32, U32, U8, U8 ) OPCODE(FPFixedS32ToSingle, U32, U32, U8, U8 ) OPCODE(FPFixedU32ToDouble, U64, U32, U8, U8 ) +OPCODE(FPFixedS32ToDouble, U64, U32, U8, U8 ) OPCODE(FPFixedU64ToDouble, U64, U64, U8, U8 ) OPCODE(FPFixedU64ToSingle, U32, U64, U8, U8 ) -OPCODE(FPFixedS32ToDouble, U64, U32, U8, U8 ) OPCODE(FPFixedS64ToDouble, U64, U64, U8, U8 ) OPCODE(FPFixedS64ToSingle, U32, U64, U8, U8 ) diff --git a/tests/A32/fuzz_arm.cpp b/tests/A32/fuzz_arm.cpp index 47a2bfac..6515c2cd 100644 --- a/tests/A32/fuzz_arm.cpp +++ b/tests/A32/fuzz_arm.cpp @@ -120,6 +120,7 @@ u32 GenRandomInst(u32 pc, bool is_last_inst) { // Incorrect Unicorn implementations "asimd_VRECPS", // Unicorn does not fuse the multiply and subtraction, resulting in being off by 1ULP. "asimd_VRSQRTS", // Unicorn does not fuse the multiply and subtraction, resulting in being off by 1ULP. + "vfp_VCVT_from_fixed", // Unicorn does not do round-to-nearest-even for this instruction correctly. }; for (const auto& [fn, bitstring] : list) {