emit_x64_vector_floating_point: Add fpcr_controlled argument to all IR instructions
This commit is contained in:
parent
33a81dae68
commit
c836b389c8
4 changed files with 339 additions and 317 deletions
|
@ -48,11 +48,6 @@ T ChooseOnFsize([[maybe_unused]] T f32, [[maybe_unused]] T f64) {
|
||||||
|
|
||||||
#define FCODE(NAME) (code.*ChooseOnFsize<fsize>(&Xbyak::CodeGenerator::NAME##s, &Xbyak::CodeGenerator::NAME##d))
|
#define FCODE(NAME) (code.*ChooseOnFsize<fsize>(&Xbyak::CodeGenerator::NAME##s, &Xbyak::CodeGenerator::NAME##d))
|
||||||
|
|
||||||
enum FpcrControlledArgument {
|
|
||||||
Present,
|
|
||||||
Absent,
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Lambda>
|
template<typename Lambda>
|
||||||
void MaybeStandardFPSCRValue(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, Lambda lambda) {
|
void MaybeStandardFPSCRValue(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, Lambda lambda) {
|
||||||
const bool switch_mxcsr = ctx.FPCR(fpcr_controlled) != ctx.FPCR();
|
const bool switch_mxcsr = ctx.FPCR(fpcr_controlled) != ctx.FPCR();
|
||||||
|
@ -97,7 +92,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
template<size_t fsize, size_t nargs, typename NaNHandler>
|
template<size_t fsize, size_t nargs, typename NaNHandler>
|
||||||
void HandleNaNs(BlockOfCode& code, EmitContext& ctx, std::array<Xbyak::Xmm, nargs + 1> xmms, const Xbyak::Xmm& nan_mask, NaNHandler nan_handler) {
|
void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std::array<Xbyak::Xmm, nargs + 1> xmms, const Xbyak::Xmm& nan_mask, NaNHandler nan_handler) {
|
||||||
static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64");
|
static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64");
|
||||||
|
|
||||||
if (code.HasSSE41()) {
|
if (code.HasSSE41()) {
|
||||||
|
@ -128,7 +123,7 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, std::array<Xbyak::Xmm, narg
|
||||||
code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], xmms[i]);
|
code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], xmms[i]);
|
||||||
}
|
}
|
||||||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||||
code.mov(code.ABI_PARAM2, ctx.FPCR().Value());
|
code.mov(code.ABI_PARAM2, ctx.FPCR(fpcr_controlled).Value());
|
||||||
|
|
||||||
code.CallFunction(nan_handler);
|
code.CallFunction(nan_handler);
|
||||||
|
|
||||||
|
@ -282,32 +277,35 @@ struct PairedLowerIndexer {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<size_t fsize, template<typename> class Indexer, typename Function>
|
template<size_t fsize, template<typename> class Indexer, size_t fpcr_controlled_arg_index = 1, typename Function>
|
||||||
void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, typename NaNHandler<fsize, Indexer, 2>::function_type nan_handler = NaNHandler<fsize, Indexer, 2>::GetDefault()) {
|
void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, typename NaNHandler<fsize, Indexer, 2>::function_type nan_handler = NaNHandler<fsize, Indexer, 2>::GetDefault()) {
|
||||||
static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64");
|
static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64");
|
||||||
|
|
||||||
if (!ctx.AccurateNaN() || ctx.FPCR().DN()) {
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
|
||||||
|
|
||||||
|
if (!ctx.AccurateNaN() || ctx.FPCR(fpcr_controlled).DN()) {
|
||||||
Xbyak::Xmm result;
|
Xbyak::Xmm result;
|
||||||
|
|
||||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
||||||
(code.*fn)(result);
|
(code.*fn)(result);
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
result = ctx.reg_alloc.ScratchXmm();
|
result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
||||||
fn(result, xmm_a);
|
fn(result, xmm_a);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
ForceToDefaultNaN<fsize>(code, ctx.FPCR(), result);
|
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
||||||
|
@ -326,17 +324,17 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||||
FCODE(cmpunordp)(nan_mask, nan_mask);
|
FCODE(cmpunordp)(nan_mask, nan_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
HandleNaNs<fsize, 1>(code, ctx, {result, xmm_a}, nan_mask, nan_handler);
|
HandleNaNs<fsize, 1>(code, ctx, fpcr_controlled, {result, xmm_a}, nan_mask, nan_handler);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<size_t fsize, template<typename> class Indexer, FpcrControlledArgument fcarg = FpcrControlledArgument::Absent, typename Function>
|
template<size_t fsize, template<typename> class Indexer, typename Function>
|
||||||
void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, typename NaNHandler<fsize, Indexer, 3>::function_type nan_handler = NaNHandler<fsize, Indexer, 3>::GetDefault()) {
|
void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, typename NaNHandler<fsize, Indexer, 3>::function_type nan_handler = NaNHandler<fsize, Indexer, 3>::GetDefault()) {
|
||||||
static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64");
|
static_assert(fsize == 32 || fsize == 64, "fsize must be either 32 or 64");
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const bool fpcr_controlled = fcarg == FpcrControlledArgument::Absent || args[2].GetImmediateU1();
|
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||||
|
|
||||||
if (!ctx.AccurateNaN() || ctx.FPCR(fpcr_controlled).DN()) {
|
if (!ctx.AccurateNaN() || ctx.FPCR(fpcr_controlled).DN()) {
|
||||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
@ -373,17 +371,17 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
}
|
}
|
||||||
FCODE(cmpunordp)(nan_mask, result);
|
FCODE(cmpunordp)(nan_mask, result);
|
||||||
|
|
||||||
HandleNaNs<fsize, 2>(code, ctx, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
|
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<FpcrControlledArgument fcarg = FpcrControlledArgument::Absent, typename Lambda>
|
template<size_t fpcr_controlled_arg_index = 1, typename Lambda>
|
||||||
void EmitTwoOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
void EmitTwoOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||||
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const bool fpcr_controlled = fcarg == FpcrControlledArgument::Absent || args[1].GetImmediateU1();
|
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
|
||||||
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
ctx.reg_alloc.EndOfAllocScope();
|
ctx.reg_alloc.EndOfAllocScope();
|
||||||
|
@ -406,7 +404,7 @@ void EmitTwoOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lamb
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Lambda>
|
template<typename Lambda>
|
||||||
void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm result, Xbyak::Xmm arg1, Xbyak::Xmm arg2, Lambda lambda, bool fpcr_controlled = true) {
|
void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm result, Xbyak::Xmm arg1, Xbyak::Xmm arg2, Lambda lambda, bool fpcr_controlled) {
|
||||||
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
||||||
|
|
||||||
const u32 fpcr = ctx.FPCR(fpcr_controlled).Value();
|
const u32 fpcr = ctx.FPCR(fpcr_controlled).Value();
|
||||||
|
@ -443,7 +441,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
|
||||||
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<FpcrControlledArgument fcarg = FpcrControlledArgument::Absent, typename Lambda>
|
template<typename Lambda>
|
||||||
void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
@ -452,7 +450,7 @@ void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, La
|
||||||
ctx.reg_alloc.EndOfAllocScope();
|
ctx.reg_alloc.EndOfAllocScope();
|
||||||
ctx.reg_alloc.HostCall(nullptr);
|
ctx.reg_alloc.HostCall(nullptr);
|
||||||
|
|
||||||
const bool fpcr_controlled = fcarg == FpcrControlledArgument::Absent || args[2].GetImmediateU1();
|
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||||
|
|
||||||
EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, lambda, fpcr_controlled);
|
EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, lambda, fpcr_controlled);
|
||||||
|
|
||||||
|
@ -460,7 +458,7 @@ void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, La
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Lambda>
|
template<typename Lambda>
|
||||||
void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm result, Xbyak::Xmm arg1, Xbyak::Xmm arg2, Xbyak::Xmm arg3, Lambda lambda) {
|
void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak::Xmm result, Xbyak::Xmm arg1, Xbyak::Xmm arg2, Xbyak::Xmm arg3, Lambda lambda, bool fpcr_controlled) {
|
||||||
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
const auto fn = static_cast<mp::equivalent_function_type<Lambda>*>(lambda);
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
@ -470,7 +468,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya
|
||||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
||||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
|
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
|
||||||
code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 4 * 16]);
|
code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 4 * 16]);
|
||||||
code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], ctx.FPCR().Value());
|
code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], ctx.FPCR(fpcr_controlled).Value());
|
||||||
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||||
code.mov(qword[rsp + ABI_SHADOW_SPACE + 8], rax);
|
code.mov(qword[rsp + ABI_SHADOW_SPACE + 8], rax);
|
||||||
#else
|
#else
|
||||||
|
@ -480,7 +478,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya
|
||||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
||||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
||||||
code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
|
code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
|
||||||
code.mov(code.ABI_PARAM5.cvt32(), ctx.FPCR().Value());
|
code.mov(code.ABI_PARAM5.cvt32(), ctx.FPCR(fpcr_controlled).Value());
|
||||||
code.lea(code.ABI_PARAM6, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
code.lea(code.ABI_PARAM6, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -501,6 +499,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya
|
||||||
template<typename Lambda>
|
template<typename Lambda>
|
||||||
void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm arg3 = ctx.reg_alloc.UseXmm(args[2]);
|
const Xbyak::Xmm arg3 = ctx.reg_alloc.UseXmm(args[2]);
|
||||||
|
@ -508,7 +507,7 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam
|
||||||
ctx.reg_alloc.EndOfAllocScope();
|
ctx.reg_alloc.EndOfAllocScope();
|
||||||
ctx.reg_alloc.HostCall(nullptr);
|
ctx.reg_alloc.HostCall(nullptr);
|
||||||
|
|
||||||
EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, arg3, lambda);
|
EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, arg3, lambda, fpcr_controlled);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -549,11 +548,11 @@ void EmitX64::EmitFPVectorAbs64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorAdd32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<32, DefaultIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::addps);
|
EmitThreeOpVectorOperation<32, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::addps);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorAdd64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<64, DefaultIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::addpd);
|
EmitThreeOpVectorOperation<64, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::addpd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorDiv32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorDiv32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -565,7 +564,7 @@ void EmitX64::EmitFPVectorDiv64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorEqual16(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorEqual16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpFallback<FpcrControlledArgument::Present>(code, ctx, inst, [](VectorArray<u16>& result, const VectorArray<u16>& op1, const VectorArray<u16>& op2, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
EmitThreeOpFallback(code, ctx, inst, [](VectorArray<u16>& result, const VectorArray<u16>& op1, const VectorArray<u16>& op2, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||||
for (size_t i = 0; i < result.size(); i++) {
|
for (size_t i = 0; i < result.size(); i++) {
|
||||||
result[i] = FP::FPCompareEQ(op1[i], op2[i], fpcr, fpsr) ? 0xFFFF : 0;
|
result[i] = FP::FPCompareEQ(op1[i], op2[i], fpcr, fpsr) ? 0xFFFF : 0;
|
||||||
}
|
}
|
||||||
|
@ -605,13 +604,15 @@ void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const int fbits = args[1].GetImmediateU8();
|
const int fbits = args[1].GetImmediateU8();
|
||||||
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
ASSERT(rounding_mode == ctx.FPCR().RMode());
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
|
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
|
||||||
|
|
||||||
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
||||||
code.cvtdq2ps(xmm, xmm);
|
code.cvtdq2ps(xmm, xmm);
|
||||||
|
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
code.mulps(xmm, GetVectorOf<32>(code, static_cast<u32>(127 - fbits) << 23));
|
code.mulps(xmm, GetVectorOf<32>(code, static_cast<u32>(127 - fbits) << 23));
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||||
}
|
}
|
||||||
|
@ -621,8 +622,10 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const int fbits = args[1].GetImmediateU8();
|
const int fbits = args[1].GetImmediateU8();
|
||||||
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
ASSERT(rounding_mode == ctx.FPCR().RMode());
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
|
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
|
||||||
|
|
||||||
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
||||||
if (code.HasAVX512_Skylake()) {
|
if (code.HasAVX512_Skylake()) {
|
||||||
code.vcvtqq2pd(xmm, xmm);
|
code.vcvtqq2pd(xmm, xmm);
|
||||||
} else if (code.HasSSE41()) {
|
} else if (code.HasSSE41()) {
|
||||||
|
@ -660,6 +663,7 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (fbits != 0) {
|
if (fbits != 0) {
|
||||||
code.mulpd(xmm, GetVectorOf<64>(code, static_cast<u64>(1023 - fbits) << 52));
|
code.mulpd(xmm, GetVectorOf<64>(code, static_cast<u64>(1023 - fbits) << 52));
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||||
}
|
}
|
||||||
|
@ -669,8 +673,10 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
|
||||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const int fbits = args[1].GetImmediateU8();
|
const int fbits = args[1].GetImmediateU8();
|
||||||
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
ASSERT(rounding_mode == ctx.FPCR().RMode());
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
|
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
|
||||||
|
|
||||||
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
||||||
if (code.HasAVX512_Skylake()) {
|
if (code.HasAVX512_Skylake()) {
|
||||||
code.vcvtudq2ps(xmm, xmm);
|
code.vcvtudq2ps(xmm, xmm);
|
||||||
} else {
|
} else {
|
||||||
|
@ -704,9 +710,10 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
|
||||||
code.mulps(xmm, GetVectorOf<32>(code, static_cast<u32>(127 - fbits) << 23));
|
code.mulps(xmm, GetVectorOf<32>(code, static_cast<u32>(127 - fbits) << 23));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.FPCR().RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
||||||
code.pand(xmm, code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF));
|
code.pand(xmm, code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF));
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||||
}
|
}
|
||||||
|
@ -716,8 +723,10 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
|
||||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const int fbits = args[1].GetImmediateU8();
|
const int fbits = args[1].GetImmediateU8();
|
||||||
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
|
||||||
ASSERT(rounding_mode == ctx.FPCR().RMode());
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
|
ASSERT(rounding_mode == ctx.FPCR(fpcr_controlled).RMode());
|
||||||
|
|
||||||
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
||||||
if (code.HasAVX512_Skylake()) {
|
if (code.HasAVX512_Skylake()) {
|
||||||
code.vcvtuqq2pd(xmm, xmm);
|
code.vcvtuqq2pd(xmm, xmm);
|
||||||
} else {
|
} else {
|
||||||
|
@ -768,9 +777,10 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
|
||||||
code.mulpd(xmm, GetVectorOf<64>(code, static_cast<u64>(1023 - fbits) << 52));
|
code.mulpd(xmm, GetVectorOf<64>(code, static_cast<u64>(1023 - fbits) << 52));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.FPCR().RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
if (ctx.FPCR(fpcr_controlled).RMode() == FP::RoundingMode::TowardsMinusInfinity) {
|
||||||
code.pand(xmm, code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF));
|
code.pand(xmm, code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF));
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||||
}
|
}
|
||||||
|
@ -951,11 +961,11 @@ void EmitX64::EmitFPVectorMin64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorMul32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorMul32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<32, DefaultIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::mulps);
|
EmitThreeOpVectorOperation<32, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::mulps);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorMul64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorMul64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<64, DefaultIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::mulpd);
|
EmitThreeOpVectorOperation<64, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::mulpd);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<size_t fsize>
|
template<size_t fsize>
|
||||||
|
@ -972,6 +982,8 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (code.HasFMA() && code.HasAVX()) {
|
if (code.HasFMA() && code.HasAVX()) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
|
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
@ -980,6 +992,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
Xbyak::Label end, fallback;
|
Xbyak::Label end, fallback;
|
||||||
|
|
||||||
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
||||||
code.movaps(result, xmm_a);
|
code.movaps(result, xmm_a);
|
||||||
FCODE(vfmadd231p)(result, xmm_b, xmm_c);
|
FCODE(vfmadd231p)(result, xmm_b, xmm_c);
|
||||||
|
|
||||||
|
@ -989,12 +1002,13 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.vptest(tmp, tmp);
|
code.vptest(tmp, tmp);
|
||||||
code.jnz(fallback, code.T_NEAR);
|
code.jnz(fallback, code.T_NEAR);
|
||||||
code.L(end);
|
code.L(end);
|
||||||
|
});
|
||||||
|
|
||||||
code.SwitchToFarCode();
|
code.SwitchToFarCode();
|
||||||
code.L(fallback);
|
code.L(fallback);
|
||||||
code.sub(rsp, 8);
|
code.sub(rsp, 8);
|
||||||
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||||
EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, xmm_a, xmm_b, xmm_c, fallback_fn);
|
EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, xmm_a, xmm_b, xmm_c, fallback_fn, fpcr_controlled);
|
||||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||||
code.add(rsp, 8);
|
code.add(rsp, 8);
|
||||||
code.jmp(end, code.T_NEAR);
|
code.jmp(end, code.T_NEAR);
|
||||||
|
@ -1025,13 +1039,15 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||||
|
|
||||||
if (ctx.FPCR().DN() && code.HasAVX()) {
|
if (ctx.FPCR(fpcr_controlled).DN() && code.HasAVX()) {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
||||||
FCODE(vcmpunordp)(xmm0, result, operand);
|
FCODE(vcmpunordp)(xmm0, result, operand);
|
||||||
FCODE(vxorp)(twos, result, operand);
|
FCODE(vxorp)(twos, result, operand);
|
||||||
FCODE(mulp)(result, operand);
|
FCODE(mulp)(result, operand);
|
||||||
|
@ -1041,6 +1057,7 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
||||||
FCODE(orp)(twos, GetVectorOf<fsize, false, 0, 2>(code));
|
FCODE(orp)(twos, GetVectorOf<fsize, false, 0, 2>(code));
|
||||||
FCODE(andnp)(xmm0, tmp);
|
FCODE(andnp)(xmm0, tmp);
|
||||||
FCODE(blendvp)(result, twos);
|
FCODE(blendvp)(result, twos);
|
||||||
|
});
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
return;
|
return;
|
||||||
|
@ -1071,7 +1088,7 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
HandleNaNs<fsize, 2>(code, ctx, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
|
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -1118,15 +1135,15 @@ void EmitX64::EmitFPVectorNeg64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<32, PairedIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::haddps);
|
EmitThreeOpVectorOperation<32, PairedIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::haddps);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<64, PairedIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::haddpd);
|
EmitThreeOpVectorOperation<64, PairedIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::haddpd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<32, PairedLowerIndexer, FpcrControlledArgument::Present>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
|
EmitThreeOpVectorOperation<32, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
|
||||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
||||||
code.xorps(zero, zero);
|
code.xorps(zero, zero);
|
||||||
code.punpcklqdq(result, xmm_b);
|
code.punpcklqdq(result, xmm_b);
|
||||||
|
@ -1135,7 +1152,7 @@ void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<64, PairedLowerIndexer, FpcrControlledArgument::Present>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
|
EmitThreeOpVectorOperation<64, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
|
||||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
||||||
code.xorps(zero, zero);
|
code.xorps(zero, zero);
|
||||||
code.punpcklqdq(result, xmm_b);
|
code.punpcklqdq(result, xmm_b);
|
||||||
|
@ -1145,7 +1162,7 @@ void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitTwoOpFallback<FpcrControlledArgument::Present>(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
EmitTwoOpFallback(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||||
for (size_t i = 0; i < result.size(); i++) {
|
for (size_t i = 0; i < result.size(); i++) {
|
||||||
result[i] = FP::FPRecipEstimate<FPT>(operand[i], fpcr, fpsr);
|
result[i] = FP::FPRecipEstimate<FPT>(operand[i], fpcr, fpsr);
|
||||||
}
|
}
|
||||||
|
@ -1211,7 +1228,7 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitThreeOpFallback<FpcrControlledArgument::Present>(code, ctx, inst, fallback_fn);
|
EmitThreeOpFallback(code, ctx, inst, fallback_fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorRecipStepFused16(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorRecipStepFused16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -1250,7 +1267,7 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
|
|
||||||
EmitTwoOpVectorOperation<fsize, DefaultIndexer>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a){
|
EmitTwoOpVectorOperation<fsize, DefaultIndexer, 3>(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a){
|
||||||
FCODE(roundp)(result, xmm_a, round_imm);
|
FCODE(roundp)(result, xmm_a, round_imm);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1287,7 +1304,7 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
mp::cartesian_product<rounding_list, exact_list>{}
|
mp::cartesian_product<rounding_list, exact_list>{}
|
||||||
);
|
);
|
||||||
|
|
||||||
EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
|
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(rounding, exact)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorRoundInt16(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorRoundInt16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -1304,7 +1321,7 @@ void EmitX64::EmitFPVectorRoundInt64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
template<typename FPT>
|
template<typename FPT>
|
||||||
static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitTwoOpFallback<FpcrControlledArgument::Present>(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
EmitTwoOpFallback(code, ctx, inst, [](VectorArray<FPT>& result, const VectorArray<FPT>& operand, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||||
for (size_t i = 0; i < result.size(); i++) {
|
for (size_t i = 0; i < result.size(); i++) {
|
||||||
result[i] = FP::FPRSqrtEstimate<FPT>(operand[i], fpcr, fpsr);
|
result[i] = FP::FPRSqrtEstimate<FPT>(operand[i], fpcr, fpsr);
|
||||||
}
|
}
|
||||||
|
@ -1380,7 +1397,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitThreeOpFallback<FpcrControlledArgument::Present>(code, ctx, inst, fallback_fn);
|
EmitThreeOpFallback(code, ctx, inst, fallback_fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorRSqrtStepFused16(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorRSqrtStepFused16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -1408,11 +1425,11 @@ void EmitX64::EmitFPVectorSqrt64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<32, DefaultIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::subps);
|
EmitThreeOpVectorOperation<32, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::subps);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorSub64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitThreeOpVectorOperation<64, DefaultIndexer, FpcrControlledArgument::Present>(code, ctx, inst, &Xbyak::CodeGenerator::subpd);
|
EmitThreeOpVectorOperation<64, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::subpd);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<size_t fsize, bool unsigned_>
|
template<size_t fsize, bool unsigned_>
|
||||||
|
@ -1421,6 +1438,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
const size_t fbits = inst->GetArg(1).GetU8();
|
const size_t fbits = inst->GetArg(1).GetU8();
|
||||||
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(2).GetU8());
|
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(2).GetU8());
|
||||||
|
const bool fpcr_controlled = inst->GetArg(3).GetU1();
|
||||||
|
|
||||||
// TODO: AVX512 implementation
|
// TODO: AVX512 implementation
|
||||||
|
|
||||||
|
@ -1430,6 +1448,8 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
||||||
|
|
||||||
const int round_imm = [&]{
|
const int round_imm = [&]{
|
||||||
switch (rounding) {
|
switch (rounding) {
|
||||||
case FP::RoundingMode::ToNearest_TieEven:
|
case FP::RoundingMode::ToNearest_TieEven:
|
||||||
|
@ -1515,6 +1535,8 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code));
|
FCODE(blendvp)(src, GetVectorOf<fsize, integer_max>(code));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, src);
|
ctx.reg_alloc.DefineValue(inst, src);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1549,7 +1571,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
mp::cartesian_product<fbits_list, rounding_list>{}
|
mp::cartesian_product<fbits_list, rounding_list>{}
|
||||||
);
|
);
|
||||||
|
|
||||||
EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding)));
|
EmitTwoOpFallback<3>(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorToSignedFixed16(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorToSignedFixed16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
|
@ -2318,12 +2318,12 @@ U128 IREmitter::FPVectorAdd(size_t esize, const U128& a, const U128& b, bool fpc
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorDiv(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::FPVectorDiv(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorDiv32, a, b);
|
return Inst<U128>(Opcode::FPVectorDiv32, a, b, Imm1(fpcr_controlled));
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorDiv64, a, b);
|
return Inst<U128>(Opcode::FPVectorDiv64, a, b, Imm1(fpcr_controlled));
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
@ -2340,24 +2340,24 @@ U128 IREmitter::FPVectorEqual(size_t esize, const U128& a, const U128& b, bool f
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorFromSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
|
U128 IREmitter::FPVectorFromSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled) {
|
||||||
ASSERT(fbits <= esize);
|
ASSERT(fbits <= esize);
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorFromSignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
return Inst<U128>(Opcode::FPVectorFromSignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)), Imm1(fpcr_controlled));
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorFromSignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
return Inst<U128>(Opcode::FPVectorFromSignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)), Imm1(fpcr_controlled));
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorFromUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
|
U128 IREmitter::FPVectorFromUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled) {
|
||||||
ASSERT(fbits <= esize);
|
ASSERT(fbits <= esize);
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorFromUnsignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
return Inst<U128>(Opcode::FPVectorFromUnsignedFixed32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)), Imm1(fpcr_controlled));
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorFromUnsignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
|
return Inst<U128>(Opcode::FPVectorFromUnsignedFixed64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)), Imm1(fpcr_controlled));
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
@ -2412,24 +2412,24 @@ U128 IREmitter::FPVectorMul(size_t esize, const U128& a, const U128& b, bool fpc
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorMulAdd(size_t esize, const U128& a, const U128& b, const U128& c) {
|
U128 IREmitter::FPVectorMulAdd(size_t esize, const U128& a, const U128& b, const U128& c, bool fpcr_controlled) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 16:
|
case 16:
|
||||||
return Inst<U128>(Opcode::FPVectorMulAdd16, a, b, c);
|
return Inst<U128>(Opcode::FPVectorMulAdd16, a, b, c, Imm1(fpcr_controlled));
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorMulAdd32, a, b, c);
|
return Inst<U128>(Opcode::FPVectorMulAdd32, a, b, c, Imm1(fpcr_controlled));
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorMulAdd64, a, b, c);
|
return Inst<U128>(Opcode::FPVectorMulAdd64, a, b, c, Imm1(fpcr_controlled));
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorMulX(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::FPVectorMulX(size_t esize, const U128& a, const U128& b, bool fpcr_controlled) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorMulX32, a, b);
|
return Inst<U128>(Opcode::FPVectorMulX32, a, b, Imm1(fpcr_controlled));
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorMulX64, a, b);
|
return Inst<U128>(Opcode::FPVectorMulX64, a, b, Imm1(fpcr_controlled));
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
@ -2490,17 +2490,17 @@ U128 IREmitter::FPVectorRecipStepFused(size_t esize, const U128& a, const U128&
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact) {
|
U128 IREmitter::FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact, bool fpcr_controlled) {
|
||||||
const IR::U8 rounding_imm = Imm8(static_cast<u8>(rounding));
|
const IR::U8 rounding_imm = Imm8(static_cast<u8>(rounding));
|
||||||
const IR::U1 exact_imm = Imm1(exact);
|
const IR::U1 exact_imm = Imm1(exact);
|
||||||
|
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 16:
|
case 16:
|
||||||
return Inst<U128>(Opcode::FPVectorRoundInt16, operand, rounding_imm, exact_imm);
|
return Inst<U128>(Opcode::FPVectorRoundInt16, operand, rounding_imm, exact_imm, Imm1(fpcr_controlled));
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorRoundInt32, operand, rounding_imm, exact_imm);
|
return Inst<U128>(Opcode::FPVectorRoundInt32, operand, rounding_imm, exact_imm, Imm1(fpcr_controlled));
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorRoundInt64, operand, rounding_imm, exact_imm);
|
return Inst<U128>(Opcode::FPVectorRoundInt64, operand, rounding_imm, exact_imm, Imm1(fpcr_controlled));
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
@ -2529,12 +2529,12 @@ U128 IREmitter::FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128&
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorSqrt(size_t esize, const U128& a) {
|
U128 IREmitter::FPVectorSqrt(size_t esize, const U128& a, bool fpcr_controlled) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorSqrt32, a);
|
return Inst<U128>(Opcode::FPVectorSqrt32, a, Imm1(fpcr_controlled));
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorSqrt64, a);
|
return Inst<U128>(Opcode::FPVectorSqrt64, a, Imm1(fpcr_controlled));
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
@ -2549,7 +2549,7 @@ U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b, bool fpc
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
|
U128 IREmitter::FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled) {
|
||||||
ASSERT(fbits <= esize);
|
ASSERT(fbits <= esize);
|
||||||
|
|
||||||
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
|
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
|
||||||
|
@ -2557,17 +2557,17 @@ U128 IREmitter::FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits,
|
||||||
|
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 16:
|
case 16:
|
||||||
return Inst<U128>(Opcode::FPVectorToSignedFixed16, a, fbits_imm, rounding_imm);
|
return Inst<U128>(Opcode::FPVectorToSignedFixed16, a, fbits_imm, rounding_imm, Imm1(fpcr_controlled));
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorToSignedFixed32, a, fbits_imm, rounding_imm);
|
return Inst<U128>(Opcode::FPVectorToSignedFixed32, a, fbits_imm, rounding_imm, Imm1(fpcr_controlled));
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorToSignedFixed64, a, fbits_imm, rounding_imm);
|
return Inst<U128>(Opcode::FPVectorToSignedFixed64, a, fbits_imm, rounding_imm, Imm1(fpcr_controlled));
|
||||||
}
|
}
|
||||||
|
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
U128 IREmitter::FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) {
|
U128 IREmitter::FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled) {
|
||||||
ASSERT(fbits <= esize);
|
ASSERT(fbits <= esize);
|
||||||
|
|
||||||
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
|
const U8 fbits_imm = Imm8(static_cast<u8>(fbits));
|
||||||
|
@ -2575,11 +2575,11 @@ U128 IREmitter::FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbit
|
||||||
|
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 16:
|
case 16:
|
||||||
return Inst<U128>(Opcode::FPVectorToUnsignedFixed16, a, fbits_imm, rounding_imm);
|
return Inst<U128>(Opcode::FPVectorToUnsignedFixed16, a, fbits_imm, rounding_imm, Imm1(fpcr_controlled));
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U128>(Opcode::FPVectorToUnsignedFixed32, a, fbits_imm, rounding_imm);
|
return Inst<U128>(Opcode::FPVectorToUnsignedFixed32, a, fbits_imm, rounding_imm, Imm1(fpcr_controlled));
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U128>(Opcode::FPVectorToUnsignedFixed64, a, fbits_imm, rounding_imm);
|
return Inst<U128>(Opcode::FPVectorToUnsignedFixed64, a, fbits_imm, rounding_imm, Imm1(fpcr_controlled));
|
||||||
}
|
}
|
||||||
|
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
|
|
|
@ -349,29 +349,29 @@ public:
|
||||||
|
|
||||||
U128 FPVectorAbs(size_t esize, const U128& a);
|
U128 FPVectorAbs(size_t esize, const U128& a);
|
||||||
U128 FPVectorAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
U128 FPVectorAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorDiv(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorDiv(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorEqual(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
U128 FPVectorEqual(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorFromSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
|
U128 FPVectorFromSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled = true);
|
||||||
U128 FPVectorFromUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
|
U128 FPVectorFromUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled = true);
|
||||||
U128 FPVectorGreater(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
U128 FPVectorGreater(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
U128 FPVectorGreaterEqual(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorMax(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
U128 FPVectorMax(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorMin(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
U128 FPVectorMin(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorMul(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
U128 FPVectorMul(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorMulAdd(size_t esize, const U128& addend, const U128& op1, const U128& op2);
|
U128 FPVectorMulAdd(size_t esize, const U128& addend, const U128& op1, const U128& op2, bool fpcr_controlled = true);
|
||||||
U128 FPVectorMulX(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorMulX(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorNeg(size_t esize, const U128& a);
|
U128 FPVectorNeg(size_t esize, const U128& a);
|
||||||
U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorRecipEstimate(size_t esize, const U128& a, bool fpcr_controlled = true);
|
U128 FPVectorRecipEstimate(size_t esize, const U128& a, bool fpcr_controlled = true);
|
||||||
U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact);
|
U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact, bool fpcr_controlled = true);
|
||||||
U128 FPVectorRSqrtEstimate(size_t esize, const U128& a, bool fpcr_controlled = true);
|
U128 FPVectorRSqrtEstimate(size_t esize, const U128& a, bool fpcr_controlled = true);
|
||||||
U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorSqrt(size_t esize, const U128& a);
|
U128 FPVectorSqrt(size_t esize, const U128& a, bool fpcr_controlled = true);
|
||||||
U128 FPVectorSub(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
U128 FPVectorSub(size_t esize, const U128& a, const U128& b, bool fpcr_controlled = true);
|
||||||
U128 FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
|
U128 FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled = true);
|
||||||
U128 FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding);
|
U128 FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding, bool fpcr_controlled = true);
|
||||||
|
|
||||||
void Breakpoint();
|
void Breakpoint();
|
||||||
|
|
||||||
|
|
|
@ -587,15 +587,15 @@ OPCODE(FPVectorAbs32, U128, U128
|
||||||
OPCODE(FPVectorAbs64, U128, U128 )
|
OPCODE(FPVectorAbs64, U128, U128 )
|
||||||
OPCODE(FPVectorAdd32, U128, U128, U128, U1 )
|
OPCODE(FPVectorAdd32, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorAdd64, U128, U128, U128, U1 )
|
OPCODE(FPVectorAdd64, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorDiv32, U128, U128, U128 )
|
OPCODE(FPVectorDiv32, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorDiv64, U128, U128, U128 )
|
OPCODE(FPVectorDiv64, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorEqual16, U128, U128, U128, U1 )
|
OPCODE(FPVectorEqual16, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorEqual32, U128, U128, U128, U1 )
|
OPCODE(FPVectorEqual32, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorEqual64, U128, U128, U128, U1 )
|
OPCODE(FPVectorEqual64, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8 )
|
OPCODE(FPVectorFromSignedFixed32, U128, U128, U8, U8, U1 )
|
||||||
OPCODE(FPVectorFromSignedFixed64, U128, U128, U8, U8 )
|
OPCODE(FPVectorFromSignedFixed64, U128, U128, U8, U8, U1 )
|
||||||
OPCODE(FPVectorFromUnsignedFixed32, U128, U128, U8, U8 )
|
OPCODE(FPVectorFromUnsignedFixed32, U128, U128, U8, U8, U1 )
|
||||||
OPCODE(FPVectorFromUnsignedFixed64, U128, U128, U8, U8 )
|
OPCODE(FPVectorFromUnsignedFixed64, U128, U128, U8, U8, U1 )
|
||||||
OPCODE(FPVectorGreater32, U128, U128, U128, U1 )
|
OPCODE(FPVectorGreater32, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorGreater64, U128, U128, U128, U1 )
|
OPCODE(FPVectorGreater64, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorGreaterEqual32, U128, U128, U128, U1 )
|
OPCODE(FPVectorGreaterEqual32, U128, U128, U128, U1 )
|
||||||
|
@ -606,11 +606,11 @@ OPCODE(FPVectorMin32, U128, U128
|
||||||
OPCODE(FPVectorMin64, U128, U128, U128, U1 )
|
OPCODE(FPVectorMin64, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorMul32, U128, U128, U128, U1 )
|
OPCODE(FPVectorMul32, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorMul64, U128, U128, U128, U1 )
|
OPCODE(FPVectorMul64, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorMulAdd16, U128, U128, U128, U128 )
|
OPCODE(FPVectorMulAdd16, U128, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorMulAdd32, U128, U128, U128, U128 )
|
OPCODE(FPVectorMulAdd32, U128, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorMulAdd64, U128, U128, U128, U128 )
|
OPCODE(FPVectorMulAdd64, U128, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorMulX32, U128, U128, U128 )
|
OPCODE(FPVectorMulX32, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorMulX64, U128, U128, U128 )
|
OPCODE(FPVectorMulX64, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorNeg16, U128, U128 )
|
OPCODE(FPVectorNeg16, U128, U128 )
|
||||||
OPCODE(FPVectorNeg32, U128, U128 )
|
OPCODE(FPVectorNeg32, U128, U128 )
|
||||||
OPCODE(FPVectorNeg64, U128, U128 )
|
OPCODE(FPVectorNeg64, U128, U128 )
|
||||||
|
@ -624,25 +624,25 @@ OPCODE(FPVectorRecipEstimate64, U128, U128
|
||||||
OPCODE(FPVectorRecipStepFused16, U128, U128, U128, U1 )
|
OPCODE(FPVectorRecipStepFused16, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorRecipStepFused32, U128, U128, U128, U1 )
|
OPCODE(FPVectorRecipStepFused32, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorRecipStepFused64, U128, U128, U128, U1 )
|
OPCODE(FPVectorRecipStepFused64, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorRoundInt16, U128, U128, U8, U1 )
|
OPCODE(FPVectorRoundInt16, U128, U128, U8, U1, U1 )
|
||||||
OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 )
|
OPCODE(FPVectorRoundInt32, U128, U128, U8, U1, U1 )
|
||||||
OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 )
|
OPCODE(FPVectorRoundInt64, U128, U128, U8, U1, U1 )
|
||||||
OPCODE(FPVectorRSqrtEstimate16, U128, U128, U1 )
|
OPCODE(FPVectorRSqrtEstimate16, U128, U128, U1 )
|
||||||
OPCODE(FPVectorRSqrtEstimate32, U128, U128, U1 )
|
OPCODE(FPVectorRSqrtEstimate32, U128, U128, U1 )
|
||||||
OPCODE(FPVectorRSqrtEstimate64, U128, U128, U1 )
|
OPCODE(FPVectorRSqrtEstimate64, U128, U128, U1 )
|
||||||
OPCODE(FPVectorRSqrtStepFused16, U128, U128, U128, U1 )
|
OPCODE(FPVectorRSqrtStepFused16, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128, U1 )
|
OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorRSqrtStepFused64, U128, U128, U128, U1 )
|
OPCODE(FPVectorRSqrtStepFused64, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorSqrt32, U128, U128 )
|
OPCODE(FPVectorSqrt32, U128, U128, U1 )
|
||||||
OPCODE(FPVectorSqrt64, U128, U128 )
|
OPCODE(FPVectorSqrt64, U128, U128, U1 )
|
||||||
OPCODE(FPVectorSub32, U128, U128, U128, U1 )
|
OPCODE(FPVectorSub32, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorSub64, U128, U128, U128, U1 )
|
OPCODE(FPVectorSub64, U128, U128, U128, U1 )
|
||||||
OPCODE(FPVectorToSignedFixed16, U128, U128, U8, U8 )
|
OPCODE(FPVectorToSignedFixed16, U128, U128, U8, U8, U1 )
|
||||||
OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 )
|
OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8, U1 )
|
||||||
OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 )
|
OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8, U1 )
|
||||||
OPCODE(FPVectorToUnsignedFixed16, U128, U128, U8, U8 )
|
OPCODE(FPVectorToUnsignedFixed16, U128, U128, U8, U8, U1 )
|
||||||
OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 )
|
OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8, U1 )
|
||||||
OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 )
|
OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8, U1 )
|
||||||
|
|
||||||
// A32 Memory access
|
// A32 Memory access
|
||||||
A32OPC(ClearExclusive, Void, )
|
A32OPC(ClearExclusive, Void, )
|
||||||
|
|
Loading…
Reference in a new issue