ir: Add opcode to perform the vector conversion S64->F64
Unfortunately x86 prior to AVX-512 doesn't really give us any convenient instruction to do the work for us
This commit is contained in:
parent
0e61ee6bf6
commit
3f6c529da2
4 changed files with 47 additions and 0 deletions
|
@ -206,6 +206,47 @@ void EmitX64::EmitFPVectorS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorS64ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ)) {
|
||||
code.vcvtqq2pd(xmm, xmm);
|
||||
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||
|
||||
// First quadword
|
||||
code.movq(tmp, xmm);
|
||||
code.cvtsi2sd(xmm, tmp);
|
||||
|
||||
// Second quadword
|
||||
code.pextrq(tmp, xmm, 1);
|
||||
code.cvtsi2sd(xmm_tmp, tmp);
|
||||
|
||||
// Combine
|
||||
code.unpcklpd(xmm, xmm_tmp);
|
||||
} else {
|
||||
const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||
|
||||
// First quadword
|
||||
code.movhlps(high_xmm, xmm);
|
||||
code.movq(tmp, xmm);
|
||||
code.cvtsi2sd(xmm, tmp);
|
||||
|
||||
// Second quadword
|
||||
code.movq(tmp, high_xmm);
|
||||
code.cvtsi2sd(xmm_tmp, tmp);
|
||||
|
||||
// Combine
|
||||
code.unpcklpd(xmm, xmm_tmp);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::subps);
|
||||
}
|
||||
|
|
|
@ -1488,6 +1488,10 @@ U128 IREmitter::FPVectorS32ToSingle(const U128& a) {
|
|||
return Inst<U128>(Opcode::FPVectorS32ToSingle, a);
|
||||
}
|
||||
|
||||
U128 IREmitter::FPVectorS64ToDouble(const U128& a) {
|
||||
return Inst<U128>(Opcode::FPVectorS64ToDouble, a);
|
||||
}
|
||||
|
||||
void IREmitter::Breakpoint() {
|
||||
Inst(Opcode::Breakpoint);
|
||||
}
|
||||
|
|
|
@ -274,6 +274,7 @@ public:
|
|||
U128 FPVectorMul(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorSub(size_t esize, const U128& a, const U128& b);
|
||||
U128 FPVectorS32ToSingle(const U128& a);
|
||||
U128 FPVectorS64ToDouble(const U128& a);
|
||||
|
||||
void Breakpoint();
|
||||
|
||||
|
|
|
@ -392,6 +392,7 @@ OPCODE(FPVectorDiv64, T::U128, T::U128, T::U
|
|||
OPCODE(FPVectorMul32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorMul64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorS32ToSingle, T::U128, T::U128 )
|
||||
OPCODE(FPVectorS64ToDouble, T::U128, T::U128 )
|
||||
OPCODE(FPVectorSub32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(FPVectorSub64, T::U128, T::U128, T::U128 )
|
||||
|
||||
|
|
Loading…
Reference in a new issue