ir: Add opcode to perform the vector conversion S64->F64
Unfortunately x86 prior to AVX-512 doesn't really give us any convenient instruction to do the work for us
This commit is contained in:
parent
0e61ee6bf6
commit
3f6c529da2
4 changed files with 47 additions and 0 deletions
|
@ -206,6 +206,47 @@ void EmitX64::EmitFPVectorS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm);
|
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPVectorS64ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512DQ)) {
|
||||||
|
code.vcvtqq2pd(xmm, xmm);
|
||||||
|
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
|
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
|
// First quadword
|
||||||
|
code.movq(tmp, xmm);
|
||||||
|
code.cvtsi2sd(xmm, tmp);
|
||||||
|
|
||||||
|
// Second quadword
|
||||||
|
code.pextrq(tmp, xmm, 1);
|
||||||
|
code.cvtsi2sd(xmm_tmp, tmp);
|
||||||
|
|
||||||
|
// Combine
|
||||||
|
code.unpcklpd(xmm, xmm_tmp);
|
||||||
|
} else {
|
||||||
|
const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
|
// First quadword
|
||||||
|
code.movhlps(high_xmm, xmm);
|
||||||
|
code.movq(tmp, xmm);
|
||||||
|
code.cvtsi2sd(xmm, tmp);
|
||||||
|
|
||||||
|
// Second quadword
|
||||||
|
code.movq(tmp, high_xmm);
|
||||||
|
code.cvtsi2sd(xmm_tmp, tmp);
|
||||||
|
|
||||||
|
// Combine
|
||||||
|
code.unpcklpd(xmm, xmm_tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, xmm);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::subps);
|
EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::subps);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1488,6 +1488,10 @@ U128 IREmitter::FPVectorS32ToSingle(const U128& a) {
|
||||||
return Inst<U128>(Opcode::FPVectorS32ToSingle, a);
|
return Inst<U128>(Opcode::FPVectorS32ToSingle, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::FPVectorS64ToDouble(const U128& a) {
|
||||||
|
return Inst<U128>(Opcode::FPVectorS64ToDouble, a);
|
||||||
|
}
|
||||||
|
|
||||||
void IREmitter::Breakpoint() {
|
void IREmitter::Breakpoint() {
|
||||||
Inst(Opcode::Breakpoint);
|
Inst(Opcode::Breakpoint);
|
||||||
}
|
}
|
||||||
|
|
|
@ -274,6 +274,7 @@ public:
|
||||||
U128 FPVectorMul(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorMul(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorSub(size_t esize, const U128& a, const U128& b);
|
U128 FPVectorSub(size_t esize, const U128& a, const U128& b);
|
||||||
U128 FPVectorS32ToSingle(const U128& a);
|
U128 FPVectorS32ToSingle(const U128& a);
|
||||||
|
U128 FPVectorS64ToDouble(const U128& a);
|
||||||
|
|
||||||
void Breakpoint();
|
void Breakpoint();
|
||||||
|
|
||||||
|
|
|
@ -392,6 +392,7 @@ OPCODE(FPVectorDiv64, T::U128, T::U128, T::U
|
||||||
OPCODE(FPVectorMul32, T::U128, T::U128, T::U128 )
|
OPCODE(FPVectorMul32, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(FPVectorMul64, T::U128, T::U128, T::U128 )
|
OPCODE(FPVectorMul64, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(FPVectorS32ToSingle, T::U128, T::U128 )
|
OPCODE(FPVectorS32ToSingle, T::U128, T::U128 )
|
||||||
|
OPCODE(FPVectorS64ToDouble, T::U128, T::U128 )
|
||||||
OPCODE(FPVectorSub32, T::U128, T::U128, T::U128 )
|
OPCODE(FPVectorSub32, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(FPVectorSub64, T::U128, T::U128, T::U128 )
|
OPCODE(FPVectorSub64, T::U128, T::U128, T::U128 )
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue