ir: Add opcode to perform the vector conversion S64->F64

Unfortunately x86 prior to AVX-512 doesn't really give us any convenient instruction to do the work for us
This commit is contained in:
Lioncash 2018-05-08 11:18:13 -04:00 committed by MerryMage
parent 0e61ee6bf6
commit 3f6c529da2
4 changed files with 47 additions and 0 deletions

View file

@ -206,6 +206,47 @@ void EmitX64::EmitFPVectorS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, xmm);
}
void EmitX64::EmitFPVectorS64ToDouble(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ)) {
code.vcvtqq2pd(xmm, xmm);
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
// First quadword
code.movq(tmp, xmm);
code.cvtsi2sd(xmm, tmp);
// Second quadword
code.pextrq(tmp, xmm, 1);
code.cvtsi2sd(xmm_tmp, tmp);
// Combine
code.unpcklpd(xmm, xmm_tmp);
} else {
const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
// First quadword
code.movhlps(high_xmm, xmm);
code.movq(tmp, xmm);
code.cvtsi2sd(xmm, tmp);
// Second quadword
code.movq(tmp, high_xmm);
code.cvtsi2sd(xmm_tmp, tmp);
// Combine
code.unpcklpd(xmm, xmm_tmp);
}
ctx.reg_alloc.DefineValue(inst, xmm);
}
void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::subps);
}

View file

@ -1488,6 +1488,10 @@ U128 IREmitter::FPVectorS32ToSingle(const U128& a) {
return Inst<U128>(Opcode::FPVectorS32ToSingle, a);
}
U128 IREmitter::FPVectorS64ToDouble(const U128& a) {
return Inst<U128>(Opcode::FPVectorS64ToDouble, a);
}
void IREmitter::Breakpoint() {
Inst(Opcode::Breakpoint);
}

View file

@ -274,6 +274,7 @@ public:
U128 FPVectorMul(size_t esize, const U128& a, const U128& b);
U128 FPVectorSub(size_t esize, const U128& a, const U128& b);
U128 FPVectorS32ToSingle(const U128& a);
U128 FPVectorS64ToDouble(const U128& a);
void Breakpoint();

View file

@ -392,6 +392,7 @@ OPCODE(FPVectorDiv64, T::U128, T::U128, T::U
OPCODE(FPVectorMul32, T::U128, T::U128, T::U128 )
OPCODE(FPVectorMul64, T::U128, T::U128, T::U128 )
OPCODE(FPVectorS32ToSingle, T::U128, T::U128 )
OPCODE(FPVectorS64ToDouble, T::U128, T::U128 )
OPCODE(FPVectorSub32, T::U128, T::U128, T::U128 )
OPCODE(FPVectorSub64, T::U128, T::U128, T::U128 )