ir: Add fpcr_controlled argument to FPVector{Equal,Greater,GreaterEqual}

This commit is contained in:
MerryMage 2020-06-20 00:01:10 +01:00
parent 1b3a70a83c
commit 656419286c
13 changed files with 102 additions and 53 deletions

View file

@ -71,8 +71,9 @@ bool A32EmitContext::IsSingleStep() const {
return Location().SingleStepping(); return Location().SingleStepping();
} }
FP::FPCR A32EmitContext::FPCR() const { FP::FPCR A32EmitContext::FPCR(bool fpcr_controlled) const {
return FP::FPCR{Location().FPSCR().Value()}; const FP::FPCR fpcr = FP::FPCR{Location().FPSCR().Value()};
return fpcr_controlled ? fpcr : fpcr.ASIMDStandardValue();
} }
A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig conf, A32::Jit* jit_interface) A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig conf, A32::Jit* jit_interface)

View file

@ -30,7 +30,7 @@ struct A32EmitContext final : public EmitContext {
A32::LocationDescriptor Location() const; A32::LocationDescriptor Location() const;
bool IsSingleStep() const; bool IsSingleStep() const;
FP::FPCR FPCR() const override; FP::FPCR FPCR(bool fpcr_controlled = true) const override;
const A32::UserConfig& conf; const A32::UserConfig& conf;
}; };

View file

@ -185,7 +185,7 @@ void A32JitState::SetFpscr(u32 FPSCR) {
fpsr_nzcv = FPSCR & FPSCR_NZCV_MASK; fpsr_nzcv = FPSCR & FPSCR_NZCV_MASK;
guest_MXCSR = 0x00001f80; guest_MXCSR = 0x00001f80;
asimd_MXCSR = 0x00001f80; asimd_MXCSR = 0x00009fc0;
// RMode // RMode
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000}; const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};

View file

@ -48,7 +48,7 @@ struct A32JitState {
// For internal use (See: BlockOfCode::RunCode) // For internal use (See: BlockOfCode::RunCode)
u32 guest_MXCSR = 0x00001f80; u32 guest_MXCSR = 0x00001f80;
u32 asimd_MXCSR = 0x00001f80; u32 asimd_MXCSR = 0x00009fc0;
u32 save_host_MXCSR = 0; u32 save_host_MXCSR = 0;
s64 cycles_to_run = 0; s64 cycles_to_run = 0;
s64 cycles_remaining = 0; s64 cycles_remaining = 0;

View file

@ -48,8 +48,8 @@ bool A64EmitContext::IsSingleStep() const {
return Location().SingleStepping(); return Location().SingleStepping();
} }
FP::FPCR A64EmitContext::FPCR() const { FP::FPCR A64EmitContext::FPCR(bool fpcr_controlled) const {
return Location().FPCR(); return fpcr_controlled ? Location().FPCR() : Location().FPCR().ASIMDStandardValue();
} }
bool A64EmitContext::AccurateNaN() const { bool A64EmitContext::AccurateNaN() const {

View file

@ -27,7 +27,7 @@ struct A64EmitContext final : public EmitContext {
A64::LocationDescriptor Location() const; A64::LocationDescriptor Location() const;
bool IsSingleStep() const; bool IsSingleStep() const;
FP::FPCR FPCR() const override; FP::FPCR FPCR(bool fpcr_controlled = true) const override;
bool AccurateNaN() const override; bool AccurateNaN() const override;
const A64::UserConfig& conf; const A64::UserConfig& conf;

View file

@ -51,7 +51,7 @@ struct A64JitState {
// For internal use (See: BlockOfCode::RunCode) // For internal use (See: BlockOfCode::RunCode)
u32 guest_MXCSR = 0x00001f80; u32 guest_MXCSR = 0x00001f80;
u32 asimd_MXCSR = 0x00001f80; u32 asimd_MXCSR = 0x00009fc0;
u32 save_host_MXCSR = 0; u32 save_host_MXCSR = 0;
s64 cycles_to_run = 0; s64 cycles_to_run = 0;
s64 cycles_remaining = 0; s64 cycles_remaining = 0;

View file

@ -46,7 +46,7 @@ struct EmitContext {
size_t GetInstOffset(IR::Inst* inst) const; size_t GetInstOffset(IR::Inst* inst) const;
void EraseInstruction(IR::Inst* inst); void EraseInstruction(IR::Inst* inst);
virtual FP::FPCR FPCR() const = 0; virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
virtual bool AccurateNaN() const { return true; } virtual bool AccurateNaN() const { return true; }
RegAlloc& reg_alloc; RegAlloc& reg_alloc;

View file

@ -35,6 +35,11 @@ using namespace Xbyak::util;
namespace { namespace {
enum FpcrControlledArgument {
Present,
Absent,
};
template<size_t fsize, typename T> template<size_t fsize, typename T>
T ChooseOnFsize([[maybe_unused]] T f32, [[maybe_unused]] T f64) { T ChooseOnFsize([[maybe_unused]] T f32, [[maybe_unused]] T f64) {
static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64"); static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64");
@ -196,9 +201,9 @@ void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm result) {
} }
template<size_t fsize> template<size_t fsize>
void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list<Xbyak::Xmm> to_daz, Xbyak::Xmm tmp) { void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list<Xbyak::Xmm> to_daz, Xbyak::Xmm tmp) {
if (ctx.FPCR().FZ()) { if (fpcr.FZ()) {
if (ctx.FPCR().RMode() != FP::RoundingMode::TowardsMinusInfinity) { if (fpcr.RMode() != FP::RoundingMode::TowardsMinusInfinity) {
code.movaps(tmp, GetNegativeZeroVector<fsize>(code)); code.movaps(tmp, GetNegativeZeroVector<fsize>(code));
} else { } else {
code.xorps(tmp, tmp); code.xorps(tmp, tmp);
@ -383,16 +388,18 @@ void EmitTwoOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lamb
} }
template<typename Lambda> template<typename Lambda>
void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm result, Xbyak::Xmm arg1, Xbyak::Xmm arg2, Lambda lambda) { void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm result, Xbyak::Xmm arg1, Xbyak::Xmm arg2, Lambda lambda, bool fpcr_controlled = true) {
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda); const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
const u32 fpcr = ctx.FPCR(fpcr_controlled).Value();
#ifdef _WIN32 #ifdef _WIN32
constexpr u32 stack_space = 4 * 16; constexpr u32 stack_space = 4 * 16;
code.sub(rsp, stack_space + ABI_SHADOW_SPACE); code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM4.cvt32(), fpcr);
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], rax); code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], rax);
#else #else
@ -401,7 +408,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM4.cvt32(), fpcr);
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
#endif #endif
@ -418,7 +425,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
code.add(rsp, stack_space + ABI_SHADOW_SPACE); code.add(rsp, stack_space + ABI_SHADOW_SPACE);
} }
template<typename Lambda> template<FpcrControlledArgument fcarg = FpcrControlledArgument::Absent, typename Lambda>
void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
@ -427,7 +434,9 @@ void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, La
ctx.reg_alloc.EndOfAllocScope(); ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr); ctx.reg_alloc.HostCall(nullptr);
EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, lambda); const bool fpcr_controlled = fcarg == FpcrControlledArgument::Absent || args[2].GetImmediateU1();
EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, lambda, fpcr_controlled);
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
@ -486,6 +495,19 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
} }
template<typename Lambda>
void MaybeStandardFPSCRValue(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, Lambda lambda) {
const bool switch_mxcsr = ctx.FPCR(fpcr_controlled) != ctx.FPCR();
if (switch_mxcsr) {
code.EnterStandardASIMD();
lambda();
code.LeaveStandardASIMD();
} else {
lambda();
}
}
} // anonymous namespace } // anonymous namespace
void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) {
@ -538,7 +560,7 @@ void EmitX64::EmitFPVectorDiv64(EmitContext& ctx, IR::Inst* inst) {
} }
void EmitX64::EmitFPVectorEqual16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorEqual16(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpFallback(code, ctx, inst, [](VectorArray<u16>& result, const VectorArray<u16>& op1, const VectorArray<u16>& op2, FP::FPCR fpcr, FP::FPSR& fpsr) { EmitThreeOpFallback<FpcrControlledArgument::Present>(code, ctx, inst, [](VectorArray<u16>& result, const VectorArray<u16>& op1, const VectorArray<u16>& op2, FP::FPCR fpcr, FP::FPSR& fpsr) {
for (size_t i = 0; i < result.size(); i++) { for (size_t i = 0; i < result.size(); i++) {
result[i] = FP::FPCompareEQ(op1[i], op2[i], fpcr, fpsr) ? 0xFFFF : 0; result[i] = FP::FPCompareEQ(op1[i], op2[i], fpcr, fpsr) ? 0xFFFF : 0;
} }
@ -548,9 +570,13 @@ void EmitX64::EmitFPVectorEqual16(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const bool fpcr_controlled = args[2].GetImmediateU1();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpeqps(a, b); code.cmpeqps(a, b);
});
ctx.reg_alloc.DefineValue(inst, a); ctx.reg_alloc.DefineValue(inst, a);
} }
@ -558,9 +584,13 @@ void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const bool fpcr_controlled = args[2].GetImmediateU1();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpeqpd(a, b); code.cmpeqpd(a, b);
});
ctx.reg_alloc.DefineValue(inst, a); ctx.reg_alloc.DefineValue(inst, a);
} }
@ -742,40 +772,56 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const bool fpcr_controlled = args[2].GetImmediateU1();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpltps(b, a); code.cmpltps(b, a);
});
ctx.reg_alloc.DefineValue(inst, b); ctx.reg_alloc.DefineValue(inst, b);
} }
void EmitX64::EmitFPVectorGreater64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorGreater64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const bool fpcr_controlled = args[2].GetImmediateU1();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpltpd(b, a); code.cmpltpd(b, a);
});
ctx.reg_alloc.DefineValue(inst, b); ctx.reg_alloc.DefineValue(inst, b);
} }
void EmitX64::EmitFPVectorGreaterEqual32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorGreaterEqual32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const bool fpcr_controlled = args[2].GetImmediateU1();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpleps(b, a); code.cmpleps(b, a);
});
ctx.reg_alloc.DefineValue(inst, b); ctx.reg_alloc.DefineValue(inst, b);
} }
void EmitX64::EmitFPVectorGreaterEqual64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorGreaterEqual64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const bool fpcr_controlled = args[2].GetImmediateU1();
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmplepd(b, a); code.cmplepd(b, a);
});
ctx.reg_alloc.DefineValue(inst, b); ctx.reg_alloc.DefineValue(inst, b);
} }
@ -791,7 +837,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
DenormalsAreZero<fsize>(code, ctx, {result, xmm_b}, mask); DenormalsAreZero<fsize>(code, ctx.FPCR(), {result, xmm_b}, mask);
if (code.HasAVX()) { if (code.HasAVX()) {
FCODE(vcmpeqp)(mask, result, xmm_b); FCODE(vcmpeqp)(mask, result, xmm_b);
@ -842,7 +888,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
const Xbyak::Xmm prev_xmm_b = xmm_b; const Xbyak::Xmm prev_xmm_b = xmm_b;
xmm_b = ctx.reg_alloc.ScratchXmm(); xmm_b = ctx.reg_alloc.ScratchXmm();
code.movaps(xmm_b, prev_xmm_b); code.movaps(xmm_b, prev_xmm_b);
DenormalsAreZero<fsize>(code, ctx, {result, xmm_b}, mask); DenormalsAreZero<fsize>(code, ctx.FPCR(), {result, xmm_b}, mask);
} }
// What we are doing here is handling the case when the inputs are differently signed zeros. // What we are doing here is handling the case when the inputs are differently signed zeros.

View file

@ -185,6 +185,8 @@ public:
FPCR stdvalue; FPCR stdvalue;
stdvalue.AHP(AHP()); stdvalue.AHP(AHP());
stdvalue.FZ16(FZ16()); stdvalue.FZ16(FZ16());
stdvalue.FZ(true);
stdvalue.DN(true);
return stdvalue; return stdvalue;
} }

View file

@ -2302,14 +2302,14 @@ U128 IREmitter::FPVectorDiv(size_t esize, const U128& a, const U128& b) {
UNREACHABLE(); UNREACHABLE();
} }
U128 IREmitter::FPVectorEqual(size_t esize, const U128& a, const U128& b) { U128 IREmitter::FPVectorEqual(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) {
switch (esize) { switch (esize) {
case 16: case 16:
return Inst<U128>(Opcode::FPVectorEqual16, a, b); return Inst<U128>(Opcode::FPVectorEqual16, a, b, Imm1(fpcr_controlled));
case 32: case 32:
return Inst<U128>(Opcode::FPVectorEqual32, a, b); return Inst<U128>(Opcode::FPVectorEqual32, a, b, Imm1(fpcr_controlled));
case 64: case 64:
return Inst<U128>(Opcode::FPVectorEqual64, a, b); return Inst<U128>(Opcode::FPVectorEqual64, a, b, Imm1(fpcr_controlled));
} }
UNREACHABLE(); UNREACHABLE();
} }
@ -2336,22 +2336,22 @@ U128 IREmitter::FPVectorFromUnsignedFixed(size_t esize, const U128& a, size_t fb
UNREACHABLE(); UNREACHABLE();
} }
U128 IREmitter::FPVectorGreater(size_t esize, const U128& a, const U128& b) { U128 IREmitter::FPVectorGreater(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) {
switch (esize) { switch (esize) {
case 32: case 32:
return Inst<U128>(Opcode::FPVectorGreater32, a, b); return Inst<U128>(Opcode::FPVectorGreater32, a, b, Imm1(fpcr_controlled));
case 64: case 64:
return Inst<U128>(Opcode::FPVectorGreater64, a, b); return Inst<U128>(Opcode::FPVectorGreater64, a, b, Imm1(fpcr_controlled));
} }
UNREACHABLE(); UNREACHABLE();
} }
U128 IREmitter::FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b) { U128 IREmitter::FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) {
switch (esize) { switch (esize) {
case 32: case 32:
return Inst<U128>(Opcode::FPVectorGreaterEqual32, a, b); return Inst<U128>(Opcode::FPVectorGreaterEqual32, a, b, Imm1(fpcr_controlled));
case 64: case 64:
return Inst<U128>(Opcode::FPVectorGreaterEqual64, a, b); return Inst<U128>(Opcode::FPVectorGreaterEqual64, a, b, Imm1(fpcr_controlled));
} }
UNREACHABLE(); UNREACHABLE();
} }

View file

@ -347,11 +347,11 @@ public:
U128 FPVectorAbs(size_t esize, const U128& a); U128 FPVectorAbs(size_t esize, const U128& a);
U128 FPVectorAdd(size_t esize, const U128& a, const U128& b); U128 FPVectorAdd(size_t esize, const U128& a, const U128& b);
U128 FPVectorDiv(size_t esize, const U128& a, const U128& b); U128 FPVectorDiv(size_t esize, const U128& a, const U128& b);
U128 FPVectorEqual(size_t esize, const U128& a, const U128& b); U128 FPVectorEqual(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
U128 FPVectorFromSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding); U128 FPVectorFromSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
U128 FPVectorFromUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding); U128 FPVectorFromUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
U128 FPVectorGreater(size_t esize, const U128& a, const U128& b); U128 FPVectorGreater(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
U128 FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b); U128 FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
U128 FPVectorMax(size_t esize, const U128& a, const U128& b); U128 FPVectorMax(size_t esize, const U128& a, const U128& b);
U128 FPVectorMin(size_t esize, const U128& a, const U128& b); U128 FPVectorMin(size_t esize, const U128& a, const U128& b);
U128 FPVectorMul(size_t esize, const U128& a, const U128& b); U128 FPVectorMul(size_t esize, const U128& a, const U128& b);

View file

@ -584,17 +584,17 @@ OPCODE(FPVectorAdd32, U128, U128
OPCODE(FPVectorAdd64, U128, U128, U128 ) OPCODE(FPVectorAdd64, U128, U128, U128 )
OPCODE(FPVectorDiv32, U128, U128, U128 ) OPCODE(FPVectorDiv32, U128, U128, U128 )
OPCODE(FPVectorDiv64, U128, U128, U128 ) OPCODE(FPVectorDiv64, U128, U128, U128 )
OPCODE(FPVectorEqual16, U128, U128, U128 ) OPCODE(FPVectorEqual16, U128, U128, U128, U1 )
OPCODE(FPVectorEqual32, U128, U128, U128 ) OPCODE(FPVectorEqual32, U128, U128, U128, U1 )
OPCODE(FPVectorEqual64, U128, U128, U128 ) OPCODE(FPVectorEqual64, U128, U128, U128, U1 )
OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8 ) OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8 )
OPCODE(FPVectorFromSignedFixed64, U128, U128, U8, U8 ) OPCODE(FPVectorFromSignedFixed64, U128, U128, U8, U8 )
OPCODE(FPVectorFromUnsignedFixed32, U128, U128, U8, U8 ) OPCODE(FPVectorFromUnsignedFixed32, U128, U128, U8, U8 )
OPCODE(FPVectorFromUnsignedFixed64, U128, U128, U8, U8 ) OPCODE(FPVectorFromUnsignedFixed64, U128, U128, U8, U8 )
OPCODE(FPVectorGreater32, U128, U128, U128 ) OPCODE(FPVectorGreater32, U128, U128, U128, U1 )
OPCODE(FPVectorGreater64, U128, U128, U128 ) OPCODE(FPVectorGreater64, U128, U128, U128, U1 )
OPCODE(FPVectorGreaterEqual32, U128, U128, U128 ) OPCODE(FPVectorGreaterEqual32, U128, U128, U128, U1 )
OPCODE(FPVectorGreaterEqual64, U128, U128, U128 ) OPCODE(FPVectorGreaterEqual64, U128, U128, U128, U1 )
OPCODE(FPVectorMax32, U128, U128, U128 ) OPCODE(FPVectorMax32, U128, U128, U128 )
OPCODE(FPVectorMax64, U128, U128, U128 ) OPCODE(FPVectorMax64, U128, U128, U128 )
OPCODE(FPVectorMin32, U128, U128, U128 ) OPCODE(FPVectorMin32, U128, U128, U128 )