emit_x64_vector: Ensure FPSR.QC is set even if output is invalidated
This commit is contained in:
parent
34cb465fc7
commit
6bcc424e1a
3 changed files with 67 additions and 47 deletions
|
@ -3829,29 +3829,29 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply16(EmitContext& ctx, IR::
|
||||||
ctx.EraseInstruction(lower_inst);
|
ctx.EraseInstruction(lower_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||||
|
code.vpsrlw(lower_tmp, lower_tmp, 15);
|
||||||
|
code.vpaddw(upper_tmp, upper_tmp, upper_tmp);
|
||||||
|
code.vpor(upper_result, upper_tmp, lower_tmp);
|
||||||
|
code.vpcmpeqw(upper_tmp, upper_result, code.XmmBConst<16>(xword, 0x8000));
|
||||||
|
code.vpxor(upper_result, upper_result, upper_tmp);
|
||||||
|
} else {
|
||||||
|
code.paddw(upper_tmp, upper_tmp);
|
||||||
|
code.psrlw(lower_tmp, 15);
|
||||||
|
code.movdqa(upper_result, upper_tmp);
|
||||||
|
code.por(upper_result, lower_tmp);
|
||||||
|
code.movdqa(upper_tmp, code.XmmBConst<16>(xword, 0x8000));
|
||||||
|
code.pcmpeqw(upper_tmp, upper_result);
|
||||||
|
code.pxor(upper_result, upper_tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
code.pmovmskb(bit, upper_tmp);
|
||||||
|
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||||
|
|
||||||
if (upper_inst) {
|
if (upper_inst) {
|
||||||
const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm();
|
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
|
||||||
code.vpsrlw(lower_tmp, lower_tmp, 15);
|
|
||||||
code.vpaddw(upper_tmp, upper_tmp, upper_tmp);
|
|
||||||
code.vpor(upper_result, upper_tmp, lower_tmp);
|
|
||||||
code.vpcmpeqw(upper_tmp, upper_result, code.XmmBConst<16>(xword, 0x8000));
|
|
||||||
code.vpxor(upper_result, upper_result, upper_tmp);
|
|
||||||
} else {
|
|
||||||
code.paddw(upper_tmp, upper_tmp);
|
|
||||||
code.psrlw(lower_tmp, 15);
|
|
||||||
code.movdqa(upper_result, upper_tmp);
|
|
||||||
code.por(upper_result, lower_tmp);
|
|
||||||
code.movdqa(upper_tmp, code.XmmBConst<16>(xword, 0x8000));
|
|
||||||
code.pcmpeqw(upper_tmp, upper_result);
|
|
||||||
code.pxor(upper_result, upper_tmp);
|
|
||||||
}
|
|
||||||
|
|
||||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
|
||||||
code.pmovmskb(bit, upper_tmp);
|
|
||||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(upper_inst, upper_result);
|
ctx.reg_alloc.DefineValue(upper_inst, upper_result);
|
||||||
ctx.EraseInstruction(upper_inst);
|
ctx.EraseInstruction(upper_inst);
|
||||||
}
|
}
|
||||||
|
@ -3880,23 +3880,23 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR::
|
||||||
code.vpaddq(odds, odds, odds);
|
code.vpaddq(odds, odds, odds);
|
||||||
code.vpaddq(even, even, even);
|
code.vpaddq(even, even, even);
|
||||||
|
|
||||||
|
const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
code.vpsrlq(upper_result, odds, 32);
|
||||||
|
code.vblendps(upper_result, upper_result, even, 0b1010);
|
||||||
|
|
||||||
|
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
|
code.vpcmpeqd(mask, upper_result, code.XmmBConst<32>(xword, 0x80000000));
|
||||||
|
code.vpxor(upper_result, upper_result, mask);
|
||||||
|
code.pmovmskb(bit, mask);
|
||||||
|
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||||
|
|
||||||
|
ctx.reg_alloc.Release(mask);
|
||||||
|
ctx.reg_alloc.Release(bit);
|
||||||
|
|
||||||
if (upper_inst) {
|
if (upper_inst) {
|
||||||
const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm();
|
|
||||||
|
|
||||||
code.vpsrlq(upper_result, odds, 32);
|
|
||||||
code.vblendps(upper_result, upper_result, even, 0b1010);
|
|
||||||
|
|
||||||
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm();
|
|
||||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
|
||||||
|
|
||||||
code.vpcmpeqd(mask, upper_result, code.XmmBConst<32>(xword, 0x80000000));
|
|
||||||
code.vpxor(upper_result, upper_result, mask);
|
|
||||||
code.pmovmskb(bit, mask);
|
|
||||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
|
||||||
|
|
||||||
ctx.reg_alloc.Release(mask);
|
|
||||||
ctx.reg_alloc.Release(bit);
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(upper_inst, upper_result);
|
ctx.reg_alloc.DefineValue(upper_inst, upper_result);
|
||||||
ctx.EraseInstruction(upper_inst);
|
ctx.EraseInstruction(upper_inst);
|
||||||
}
|
}
|
||||||
|
@ -3955,15 +3955,15 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR::
|
||||||
code.por(lower_result, x);
|
code.por(lower_result, x);
|
||||||
code.psubd(upper_result, sign_correction);
|
code.psubd(upper_result, sign_correction);
|
||||||
|
|
||||||
|
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
|
code.movdqa(tmp, code.XmmBConst<32>(xword, 0x80000000));
|
||||||
|
code.pcmpeqd(tmp, upper_result);
|
||||||
|
code.pxor(upper_result, tmp);
|
||||||
|
code.pmovmskb(bit, tmp);
|
||||||
|
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||||
|
|
||||||
if (upper_inst) {
|
if (upper_inst) {
|
||||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
|
||||||
|
|
||||||
code.movdqa(tmp, code.XmmBConst<32>(xword, 0x80000000));
|
|
||||||
code.pcmpeqd(tmp, upper_result);
|
|
||||||
code.pxor(upper_result, tmp);
|
|
||||||
code.pmovmskb(bit, tmp);
|
|
||||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(upper_inst, upper_result);
|
ctx.reg_alloc.DefineValue(upper_inst, upper_result);
|
||||||
ctx.EraseInstruction(upper_inst);
|
ctx.EraseInstruction(upper_inst);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1179,3 +1179,23 @@ TEST_CASE("A64: Memory access (fastmem)", "[a64]") {
|
||||||
jit.Run();
|
jit.Run();
|
||||||
REQUIRE(strncmp(backing_memory + 0x100, backing_memory + 0x1F0, 23) == 0);
|
REQUIRE(strncmp(backing_memory + 0x100, backing_memory + 0x1F0, 23) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE("A64: SQRDMULH QC flag when output invalidated", "[a64]") {
|
||||||
|
A64TestEnv env;
|
||||||
|
A64::Jit jit{A64::UserConfig{&env}};
|
||||||
|
|
||||||
|
env.code_mem.emplace_back(0x0fbcd38b); // SQRDMULH.2S V11, V28, V28[1]
|
||||||
|
env.code_mem.emplace_back(0x7ef0f8eb); // FMINP.2D D11, V7
|
||||||
|
env.code_mem.emplace_back(0x14000000); // B .
|
||||||
|
|
||||||
|
jit.SetPC(0);
|
||||||
|
jit.SetVector(7, {0xb1b5'd0b1'4e54'e281, 0xb4cb'4fec'8563'1032});
|
||||||
|
jit.SetVector(28, {0x8000'0000'0000'0000, 0x0000'0000'0000'0000});
|
||||||
|
jit.SetFpcr(0x05400000);
|
||||||
|
|
||||||
|
env.ticks_left = 3;
|
||||||
|
jit.Run();
|
||||||
|
|
||||||
|
REQUIRE(jit.GetFpsr() == 0x08000000);
|
||||||
|
REQUIRE(jit.GetVector(11) == Vector{0xb4cb'4fec'8563'1032, 0x0000'0000'0000'0000});
|
||||||
|
}
|
||||||
|
|
|
@ -211,7 +211,7 @@ static void RunTestInstance(Dynarmic::A64::Jit& jit, A64Unicorn& uni, A64TestEnv
|
||||||
fmt::print("{:3s}: {:016x}\n", A64::RegToString(static_cast<A64::Reg>(i)), regs[i]);
|
fmt::print("{:3s}: {:016x}\n", A64::RegToString(static_cast<A64::Reg>(i)), regs[i]);
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < vecs.size(); ++i) {
|
for (size_t i = 0; i < vecs.size(); ++i) {
|
||||||
fmt::print("{:3s}: {}{}\n", A64::VecToString(static_cast<A64::Vec>(i)), vecs[i][1], vecs[i][0]);
|
fmt::print("{:3s}: {:016x}{:016x}\n", A64::VecToString(static_cast<A64::Vec>(i)), vecs[i][1], vecs[i][0]);
|
||||||
}
|
}
|
||||||
fmt::print("sp : {:016x}\n", initial_sp);
|
fmt::print("sp : {:016x}\n", initial_sp);
|
||||||
fmt::print("pc : {:016x}\n", instructions_start);
|
fmt::print("pc : {:016x}\n", instructions_start);
|
||||||
|
|
Loading…
Reference in a new issue