backend_x64: Use a reference to BlockOfCode instead of a pointer
This commit is contained in:
parent
8931ee346b
commit
68f46c8334
27 changed files with 1331 additions and 1331 deletions
File diff suppressed because it is too large
Load diff
|
@ -30,7 +30,7 @@ struct A32EmitContext final : public EmitContext {
|
||||||
|
|
||||||
class A32EmitX64 final : public EmitX64 {
|
class A32EmitX64 final : public EmitX64 {
|
||||||
public:
|
public:
|
||||||
A32EmitX64(BlockOfCode* code, A32::UserConfig config, A32::Jit* jit_interface);
|
A32EmitX64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface);
|
||||||
~A32EmitX64() override;
|
~A32EmitX64() override;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -46,7 +46,7 @@ static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*Lo
|
||||||
struct Jit::Impl {
|
struct Jit::Impl {
|
||||||
Impl(Jit* jit, A32::UserConfig config)
|
Impl(Jit* jit, A32::UserConfig config)
|
||||||
: block_of_code(GenRunCodeCallbacks(config.callbacks, &GetCurrentBlock, this), JitStateInfo{jit_state})
|
: block_of_code(GenRunCodeCallbacks(config.callbacks, &GetCurrentBlock, this), JitStateInfo{jit_state})
|
||||||
, emitter(&block_of_code, config, jit)
|
, emitter(block_of_code, config, jit)
|
||||||
, config(config)
|
, config(config)
|
||||||
, jit_interface(jit)
|
, jit_interface(jit)
|
||||||
{}
|
{}
|
||||||
|
|
|
@ -52,17 +52,17 @@ bool A64EmitContext::FPSCR_DN() const {
|
||||||
return Location().FPCR().DN();
|
return Location().FPCR().DN();
|
||||||
}
|
}
|
||||||
|
|
||||||
A64EmitX64::A64EmitX64(BlockOfCode* code, A64::UserConfig conf)
|
A64EmitX64::A64EmitX64(BlockOfCode& code, A64::UserConfig conf)
|
||||||
: EmitX64(code), conf(conf)
|
: EmitX64(code), conf(conf)
|
||||||
{
|
{
|
||||||
code->PreludeComplete();
|
code.PreludeComplete();
|
||||||
}
|
}
|
||||||
|
|
||||||
A64EmitX64::~A64EmitX64() = default;
|
A64EmitX64::~A64EmitX64() = default;
|
||||||
|
|
||||||
A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
|
A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
|
||||||
code->align();
|
code.align();
|
||||||
const u8* const entrypoint = code->getCurr();
|
const u8* const entrypoint = code.getCurr();
|
||||||
|
|
||||||
// Start emitting.
|
// Start emitting.
|
||||||
EmitCondPrelude(block);
|
EmitCondPrelude(block);
|
||||||
|
@ -102,12 +102,12 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
|
||||||
|
|
||||||
EmitAddCycles(block.CycleCount());
|
EmitAddCycles(block.CycleCount());
|
||||||
EmitX64::EmitTerminal(block.GetTerminal(), block.Location());
|
EmitX64::EmitTerminal(block.GetTerminal(), block.Location());
|
||||||
code->int3();
|
code.int3();
|
||||||
|
|
||||||
const A64::LocationDescriptor descriptor{block.Location()};
|
const A64::LocationDescriptor descriptor{block.Location()};
|
||||||
Patch(descriptor, entrypoint);
|
Patch(descriptor, entrypoint);
|
||||||
|
|
||||||
const size_t size = static_cast<size_t>(code->getCurr() - entrypoint);
|
const size_t size = static_cast<size_t>(code.getCurr() - entrypoint);
|
||||||
const A64::LocationDescriptor end_location{block.EndLocation()};
|
const A64::LocationDescriptor end_location{block.EndLocation()};
|
||||||
const auto range = boost::icl::discrete_interval<u64>::closed(descriptor.PC(), end_location.PC() - 1);
|
const auto range = boost::icl::discrete_interval<u64>::closed(descriptor.PC(), end_location.PC() - 1);
|
||||||
A64EmitX64::BlockDescriptor block_desc{entrypoint, size};
|
A64EmitX64::BlockDescriptor block_desc{entrypoint, size};
|
||||||
|
@ -129,32 +129,32 @@ void A64EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u64>& rang
|
||||||
void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
|
Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
|
||||||
code->mov(code->byte[r15 + offsetof(A64JitState, check_bit)], to_store);
|
code.mov(code.byte[r15 + offsetof(A64JitState, check_bit)], to_store);
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
code->mov(result, dword[r15 + offsetof(A64JitState, CPSR_nzcv)]);
|
code.mov(result, dword[r15 + offsetof(A64JitState, CPSR_nzcv)]);
|
||||||
code->shr(result, 29);
|
code.shr(result, 29);
|
||||||
code->and_(result, 1);
|
code.and_(result, 1);
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||||
code->and_(to_store, 0b11000001'00000001);
|
code.and_(to_store, 0b11000001'00000001);
|
||||||
code->imul(to_store, to_store, 0b00010000'00100001);
|
code.imul(to_store, to_store, 0b00010000'00100001);
|
||||||
code->shl(to_store, 16);
|
code.shl(to_store, 16);
|
||||||
code->and_(to_store, 0xF0000000);
|
code.and_(to_store, 0xF0000000);
|
||||||
code->mov(dword[r15 + offsetof(A64JitState, CPSR_nzcv)], to_store);
|
code.mov(dword[r15 + offsetof(A64JitState, CPSR_nzcv)], to_store);
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||||
|
|
||||||
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
code->mov(result, dword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
code.mov(result, dword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,7 +162,7 @@ void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||||
|
|
||||||
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
||||||
code->mov(result, qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
code.mov(result, qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,7 +171,7 @@ void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||||
|
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
code->movd(result, addr);
|
code.movd(result, addr);
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,7 +180,7 @@ void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||||
|
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
code->movq(result, addr);
|
code.movq(result, addr);
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -189,13 +189,13 @@ void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||||
|
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
code->movaps(result, addr);
|
code.movaps(result, addr);
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
||||||
code->mov(result, qword[r15 + offsetof(A64JitState, sp)]);
|
code.mov(result, qword[r15 + offsetof(A64JitState, sp)]);
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -204,12 +204,12 @@ void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||||
auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
|
auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
|
||||||
if (args[1].FitsInImmediateS32()) {
|
if (args[1].FitsInImmediateS32()) {
|
||||||
code->mov(addr, args[1].GetImmediateS32());
|
code.mov(addr, args[1].GetImmediateS32());
|
||||||
} else {
|
} else {
|
||||||
// TODO: zext tracking, xmm variant
|
// TODO: zext tracking, xmm variant
|
||||||
Xbyak::Reg64 to_store = ctx.reg_alloc.UseScratchGpr(args[1]);
|
Xbyak::Reg64 to_store = ctx.reg_alloc.UseScratchGpr(args[1]);
|
||||||
code->mov(to_store.cvt32(), to_store.cvt32());
|
code.mov(to_store.cvt32(), to_store.cvt32());
|
||||||
code->mov(addr, to_store);
|
code.mov(addr, to_store);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -218,13 +218,13 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||||
auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
|
auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
|
||||||
if (args[1].FitsInImmediateS32()) {
|
if (args[1].FitsInImmediateS32()) {
|
||||||
code->mov(addr, args[1].GetImmediateS32());
|
code.mov(addr, args[1].GetImmediateS32());
|
||||||
} else if (args[1].IsInXmm()) {
|
} else if (args[1].IsInXmm()) {
|
||||||
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
code->movq(addr, to_store);
|
code.movq(addr, to_store);
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]);
|
Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]);
|
||||||
code->mov(addr, to_store);
|
code.mov(addr, to_store);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -236,9 +236,9 @@ void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
// TODO: Optimize
|
// TODO: Optimize
|
||||||
code->pxor(tmp, tmp);
|
code.pxor(tmp, tmp);
|
||||||
code->movss(tmp, to_store);
|
code.movss(tmp, to_store);
|
||||||
code->movaps(addr, tmp);
|
code.movaps(addr, tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -247,8 +247,8 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||||
|
|
||||||
Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
|
Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
code->movq(to_store, to_store); // TODO: Remove when able
|
code.movq(to_store, to_store); // TODO: Remove when able
|
||||||
code->movaps(addr, to_store);
|
code.movaps(addr, to_store);
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -257,20 +257,20 @@ void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||||
|
|
||||||
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
code->movaps(addr, to_store);
|
code.movaps(addr, to_store);
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
auto addr = qword[r15 + offsetof(A64JitState, sp)];
|
auto addr = qword[r15 + offsetof(A64JitState, sp)];
|
||||||
if (args[0].FitsInImmediateS32()) {
|
if (args[0].FitsInImmediateS32()) {
|
||||||
code->mov(addr, args[0].GetImmediateS32());
|
code.mov(addr, args[0].GetImmediateS32());
|
||||||
} else if (args[0].IsInXmm()) {
|
} else if (args[0].IsInXmm()) {
|
||||||
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
|
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
code->movq(addr, to_store);
|
code.movq(addr, to_store);
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]);
|
Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]);
|
||||||
code->mov(addr, to_store);
|
code.mov(addr, to_store);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -278,13 +278,13 @@ void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
auto addr = qword[r15 + offsetof(A64JitState, pc)];
|
auto addr = qword[r15 + offsetof(A64JitState, pc)];
|
||||||
if (args[0].FitsInImmediateS32()) {
|
if (args[0].FitsInImmediateS32()) {
|
||||||
code->mov(addr, args[0].GetImmediateS32());
|
code.mov(addr, args[0].GetImmediateS32());
|
||||||
} else if (args[0].IsInXmm()) {
|
} else if (args[0].IsInXmm()) {
|
||||||
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
|
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
code->movq(addr, to_store);
|
code.movq(addr, to_store);
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]);
|
Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]);
|
||||||
code->mov(addr, to_store);
|
code.mov(addr, to_store);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -294,7 +294,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
ASSERT(args[0].IsImmediate());
|
ASSERT(args[0].IsImmediate());
|
||||||
u32 imm = args[0].GetImmediateU32();
|
u32 imm = args[0].GetImmediateU32();
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::CallSVC).EmitCall(code, [&](Xbyak::Reg64 param1) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::CallSVC).EmitCall(code, [&](Xbyak::Reg64 param1) {
|
||||||
code->mov(param1.cvt32(), imm);
|
code.mov(param1.cvt32(), imm);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -305,14 +305,14 @@ void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
u64 pc = args[0].GetImmediateU64();
|
u64 pc = args[0].GetImmediateU64();
|
||||||
u64 exception = args[1].GetImmediateU64();
|
u64 exception = args[1].GetImmediateU64();
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::ExceptionRaised).EmitCall(code, [&](Xbyak::Reg64 param1, Xbyak::Reg64 param2) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::ExceptionRaised).EmitCall(code, [&](Xbyak::Reg64 param1, Xbyak::Reg64 param2) {
|
||||||
code->mov(param1, pc);
|
code.mov(param1, pc);
|
||||||
code->mov(param2, exception);
|
code.mov(param2, exception);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead8).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead8).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
|
||||||
ASSERT(vaddr == code->ABI_PARAM2);
|
ASSERT(vaddr == code.ABI_PARAM2);
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.HostCall(inst, {}, args[0]);
|
ctx.reg_alloc.HostCall(inst, {}, args[0]);
|
||||||
});
|
});
|
||||||
|
@ -320,7 +320,7 @@ void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead16).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead16).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
|
||||||
ASSERT(vaddr == code->ABI_PARAM2);
|
ASSERT(vaddr == code.ABI_PARAM2);
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.HostCall(inst, {}, args[0]);
|
ctx.reg_alloc.HostCall(inst, {}, args[0]);
|
||||||
});
|
});
|
||||||
|
@ -328,7 +328,7 @@ void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead32).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead32).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
|
||||||
ASSERT(vaddr == code->ABI_PARAM2);
|
ASSERT(vaddr == code.ABI_PARAM2);
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.HostCall(inst, {}, args[0]);
|
ctx.reg_alloc.HostCall(inst, {}, args[0]);
|
||||||
});
|
});
|
||||||
|
@ -336,7 +336,7 @@ void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead64).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead64).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
|
||||||
ASSERT(vaddr == code->ABI_PARAM2);
|
ASSERT(vaddr == code.ABI_PARAM2);
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.HostCall(inst, {}, args[0]);
|
ctx.reg_alloc.HostCall(inst, {}, args[0]);
|
||||||
});
|
});
|
||||||
|
@ -348,33 +348,33 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
static_assert(ABI_SHADOW_SPACE >= 16);
|
static_assert(ABI_SHADOW_SPACE >= 16);
|
||||||
ctx.reg_alloc.HostCall(nullptr, {}, {}, args[0]);
|
ctx.reg_alloc.HostCall(nullptr, {}, {}, args[0]);
|
||||||
code->lea(code->ABI_PARAM2, ptr[rsp]);
|
code.lea(code.ABI_PARAM2, ptr[rsp]);
|
||||||
code->sub(rsp, ABI_SHADOW_SPACE);
|
code.sub(rsp, ABI_SHADOW_SPACE);
|
||||||
|
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code, [&](Xbyak::Reg64 return_value, Xbyak::Reg64 vaddr) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code, [&](Xbyak::Reg64 return_value, Xbyak::Reg64 vaddr) {
|
||||||
ASSERT(return_value == code->ABI_PARAM2 && vaddr == code->ABI_PARAM3);
|
ASSERT(return_value == code.ABI_PARAM2 && vaddr == code.ABI_PARAM3);
|
||||||
});
|
});
|
||||||
|
|
||||||
Xbyak::Xmm result = xmm0;
|
Xbyak::Xmm result = xmm0;
|
||||||
code->movups(result, xword[code->ABI_RETURN]);
|
code.movups(result, xword[code.ABI_RETURN]);
|
||||||
code->add(rsp, ABI_SHADOW_SPACE);
|
code.add(rsp, ABI_SHADOW_SPACE);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
#else
|
#else
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead128).EmitCall(code, [&](Xbyak::Reg64 vaddr) {
|
||||||
ASSERT(vaddr == code->ABI_PARAM2);
|
ASSERT(vaddr == code.ABI_PARAM2);
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
|
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
|
||||||
});
|
});
|
||||||
Xbyak::Xmm result = xmm0;
|
Xbyak::Xmm result = xmm0;
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
code->movq(result, code->ABI_RETURN);
|
code.movq(result, code.ABI_RETURN);
|
||||||
code->pinsrq(result, code->ABI_RETURN2, 1);
|
code.pinsrq(result, code.ABI_RETURN2, 1);
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Xmm tmp = xmm1;
|
Xbyak::Xmm tmp = xmm1;
|
||||||
code->movq(result, code->ABI_RETURN);
|
code.movq(result, code.ABI_RETURN);
|
||||||
code->movq(tmp, code->ABI_RETURN2);
|
code.movq(tmp, code.ABI_RETURN2);
|
||||||
code->punpcklqdq(result, tmp);
|
code.punpcklqdq(result, tmp);
|
||||||
}
|
}
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
#endif
|
#endif
|
||||||
|
@ -382,7 +382,7 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite8).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite8).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) {
|
||||||
ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3);
|
ASSERT(vaddr == code.ABI_PARAM2 && value == code.ABI_PARAM3);
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
|
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
|
||||||
});
|
});
|
||||||
|
@ -390,7 +390,7 @@ void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite16).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite16).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) {
|
||||||
ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3);
|
ASSERT(vaddr == code.ABI_PARAM2 && value == code.ABI_PARAM3);
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
|
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
|
||||||
});
|
});
|
||||||
|
@ -398,7 +398,7 @@ void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite32).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite32).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) {
|
||||||
ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3);
|
ASSERT(vaddr == code.ABI_PARAM2 && value == code.ABI_PARAM3);
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
|
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
|
||||||
});
|
});
|
||||||
|
@ -406,7 +406,7 @@ void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) {
|
void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite64).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite64).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value) {
|
||||||
ASSERT(vaddr == code->ABI_PARAM2 && value == code->ABI_PARAM3);
|
ASSERT(vaddr == code.ABI_PARAM2 && value == code.ABI_PARAM3);
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
|
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
|
||||||
});
|
});
|
||||||
|
@ -421,31 +421,31 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
ctx.reg_alloc.EndOfAllocScope();
|
ctx.reg_alloc.EndOfAllocScope();
|
||||||
ctx.reg_alloc.HostCall(nullptr);
|
ctx.reg_alloc.HostCall(nullptr);
|
||||||
code->lea(code->ABI_PARAM3, ptr[rsp]);
|
code.lea(code.ABI_PARAM3, ptr[rsp]);
|
||||||
code->sub(rsp, ABI_SHADOW_SPACE);
|
code.sub(rsp, ABI_SHADOW_SPACE);
|
||||||
code->movaps(xword[code->ABI_PARAM3], xmm_value);
|
code.movaps(xword[code.ABI_PARAM3], xmm_value);
|
||||||
|
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value_ptr) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value_ptr) {
|
||||||
ASSERT(vaddr == code->ABI_PARAM2 && value_ptr == code->ABI_PARAM3);
|
ASSERT(vaddr == code.ABI_PARAM2 && value_ptr == code.ABI_PARAM3);
|
||||||
});
|
});
|
||||||
|
|
||||||
code->add(rsp, ABI_SHADOW_SPACE);
|
code.add(rsp, ABI_SHADOW_SPACE);
|
||||||
#else
|
#else
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value0, Xbyak::Reg64 value1) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite128).EmitCall(code, [&](Xbyak::Reg64 vaddr, Xbyak::Reg64 value0, Xbyak::Reg64 value1) {
|
||||||
ASSERT(vaddr == code->ABI_PARAM2 && value0 == code->ABI_PARAM3 && value1 == code->ABI_PARAM4);
|
ASSERT(vaddr == code.ABI_PARAM2 && value0 == code.ABI_PARAM3 && value1 == code.ABI_PARAM4);
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.Use(args[0], ABI_PARAM2);
|
ctx.reg_alloc.Use(args[0], ABI_PARAM2);
|
||||||
ctx.reg_alloc.ScratchGpr({ABI_PARAM3});
|
ctx.reg_alloc.ScratchGpr({ABI_PARAM3});
|
||||||
ctx.reg_alloc.ScratchGpr({ABI_PARAM4});
|
ctx.reg_alloc.ScratchGpr({ABI_PARAM4});
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_value = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
code->movq(code->ABI_PARAM3, xmm_value);
|
code.movq(code.ABI_PARAM3, xmm_value);
|
||||||
code->pextrq(code->ABI_PARAM4, xmm_value, 1);
|
code.pextrq(code.ABI_PARAM4, xmm_value, 1);
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Xmm xmm_value = ctx.reg_alloc.UseScratchXmm(args[1]);
|
Xbyak::Xmm xmm_value = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
code->movq(code->ABI_PARAM3, xmm_value);
|
code.movq(code.ABI_PARAM3, xmm_value);
|
||||||
code->punpckhqdq(xmm_value, xmm_value);
|
code.punpckhqdq(xmm_value, xmm_value);
|
||||||
code->movq(code->ABI_PARAM4, xmm_value);
|
code.movq(code.ABI_PARAM4, xmm_value);
|
||||||
}
|
}
|
||||||
ctx.reg_alloc.EndOfAllocScope();
|
ctx.reg_alloc.EndOfAllocScope();
|
||||||
ctx.reg_alloc.HostCall(nullptr);
|
ctx.reg_alloc.HostCall(nullptr);
|
||||||
|
@ -454,35 +454,35 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) {
|
void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) {
|
||||||
code->SwitchMxcsrOnExit();
|
code.SwitchMxcsrOnExit();
|
||||||
DEVIRT(conf.callbacks, &A64::UserCallbacks::InterpreterFallback).EmitCall(code, [&](Xbyak::Reg64 param1, Xbyak::Reg64 param2) {
|
DEVIRT(conf.callbacks, &A64::UserCallbacks::InterpreterFallback).EmitCall(code, [&](Xbyak::Reg64 param1, Xbyak::Reg64 param2) {
|
||||||
code->mov(param1, A64::LocationDescriptor{terminal.next}.PC());
|
code.mov(param1, A64::LocationDescriptor{terminal.next}.PC());
|
||||||
code->mov(qword[r15 + offsetof(A64JitState, pc)], param1);
|
code.mov(qword[r15 + offsetof(A64JitState, pc)], param1);
|
||||||
code->mov(param2.cvt32(), terminal.num_instructions);
|
code.mov(param2.cvt32(), terminal.num_instructions);
|
||||||
});
|
});
|
||||||
code->ReturnFromRunCode(true); // TODO: Check cycles
|
code.ReturnFromRunCode(true); // TODO: Check cycles
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor) {
|
void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescriptor) {
|
||||||
code->ReturnFromRunCode();
|
code.ReturnFromRunCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor) {
|
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor) {
|
||||||
code->cmp(qword[r15 + offsetof(A64JitState, cycles_remaining)], 0);
|
code.cmp(qword[r15 + offsetof(A64JitState, cycles_remaining)], 0);
|
||||||
|
|
||||||
patch_information[terminal.next].jg.emplace_back(code->getCurr());
|
patch_information[terminal.next].jg.emplace_back(code.getCurr());
|
||||||
if (auto next_bb = GetBasicBlock(terminal.next)) {
|
if (auto next_bb = GetBasicBlock(terminal.next)) {
|
||||||
EmitPatchJg(terminal.next, next_bb->entrypoint);
|
EmitPatchJg(terminal.next, next_bb->entrypoint);
|
||||||
} else {
|
} else {
|
||||||
EmitPatchJg(terminal.next);
|
EmitPatchJg(terminal.next);
|
||||||
}
|
}
|
||||||
code->mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||||
code->mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||||
code->ForceReturnFromRunCode();
|
code.ForceReturnFromRunCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor) {
|
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor) {
|
||||||
patch_information[terminal.next].jmp.emplace_back(code->getCurr());
|
patch_information[terminal.next].jmp.emplace_back(code.getCurr());
|
||||||
if (auto next_bb = GetBasicBlock(terminal.next)) {
|
if (auto next_bb = GetBasicBlock(terminal.next)) {
|
||||||
EmitPatchJmp(terminal.next, next_bb->entrypoint);
|
EmitPatchJmp(terminal.next, next_bb->entrypoint);
|
||||||
} else {
|
} else {
|
||||||
|
@ -493,20 +493,20 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::Location
|
||||||
void A64EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor) {
|
void A64EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor) {
|
||||||
// This calculation has to match up with A64::LocationDescriptor::UniqueHash
|
// This calculation has to match up with A64::LocationDescriptor::UniqueHash
|
||||||
// TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et.
|
// TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et.
|
||||||
code->mov(rcx, qword[r15 + offsetof(A64JitState, pc)]);
|
code.mov(rcx, qword[r15 + offsetof(A64JitState, pc)]);
|
||||||
code->mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]);
|
code.mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]);
|
||||||
code->and_(ebx, A64::LocationDescriptor::FPCR_MASK);
|
code.and_(ebx, A64::LocationDescriptor::FPCR_MASK);
|
||||||
code->shl(ebx, 37);
|
code.shl(ebx, 37);
|
||||||
code->or_(rbx, rcx);
|
code.or_(rbx, rcx);
|
||||||
|
|
||||||
code->mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]);
|
code.mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]);
|
||||||
code->sub(eax, 1);
|
code.sub(eax, 1);
|
||||||
code->and_(eax, u32(A64JitState::RSBPtrMask));
|
code.and_(eax, u32(A64JitState::RSBPtrMask));
|
||||||
code->mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax);
|
code.mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax);
|
||||||
code->cmp(rbx, qword[r15 + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
code.cmp(rbx, qword[r15 + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||||
code->jne(code->GetReturnFromRunCodeAddress());
|
code.jne(code.GetReturnFromRunCodeAddress());
|
||||||
code->mov(rax, qword[r15 + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]);
|
code.mov(rax, qword[r15 + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]);
|
||||||
code->jmp(rax);
|
code.jmp(rax);
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) {
|
void A64EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) {
|
||||||
|
@ -518,7 +518,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor
|
||||||
default:
|
default:
|
||||||
Xbyak::Label pass = EmitCond(terminal.if_);
|
Xbyak::Label pass = EmitCond(terminal.if_);
|
||||||
EmitTerminal(terminal.else_, initial_location);
|
EmitTerminal(terminal.else_, initial_location);
|
||||||
code->L(pass);
|
code.L(pass);
|
||||||
EmitTerminal(terminal.then_, initial_location);
|
EmitTerminal(terminal.then_, initial_location);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -526,50 +526,50 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor
|
||||||
|
|
||||||
void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location) {
|
void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location) {
|
||||||
Xbyak::Label fail;
|
Xbyak::Label fail;
|
||||||
code->cmp(code->byte[r15 + offsetof(A64JitState, check_bit)], u8(0));
|
code.cmp(code.byte[r15 + offsetof(A64JitState, check_bit)], u8(0));
|
||||||
code->jz(fail);
|
code.jz(fail);
|
||||||
EmitTerminal(terminal.then_, initial_location);
|
EmitTerminal(terminal.then_, initial_location);
|
||||||
code->L(fail);
|
code.L(fail);
|
||||||
EmitTerminal(terminal.else_, initial_location);
|
EmitTerminal(terminal.else_, initial_location);
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) {
|
void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) {
|
||||||
code->cmp(code->byte[r15 + offsetof(A64JitState, halt_requested)], u8(0));
|
code.cmp(code.byte[r15 + offsetof(A64JitState, halt_requested)], u8(0));
|
||||||
code->jne(code->GetForceReturnFromRunCodeAddress());
|
code.jne(code.GetForceReturnFromRunCodeAddress());
|
||||||
EmitTerminal(terminal.else_, initial_location);
|
EmitTerminal(terminal.else_, initial_location);
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
|
void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
|
||||||
const CodePtr patch_location = code->getCurr();
|
const CodePtr patch_location = code.getCurr();
|
||||||
if (target_code_ptr) {
|
if (target_code_ptr) {
|
||||||
code->jg(target_code_ptr);
|
code.jg(target_code_ptr);
|
||||||
} else {
|
} else {
|
||||||
code->mov(rax, A64::LocationDescriptor{target_desc}.PC());
|
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
|
||||||
code->mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||||
code->jg(code->GetReturnFromRunCodeAddress());
|
code.jg(code.GetReturnFromRunCodeAddress());
|
||||||
}
|
}
|
||||||
code->EnsurePatchLocationSize(patch_location, 30); // TODO: Reduce size
|
code.EnsurePatchLocationSize(patch_location, 30); // TODO: Reduce size
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
|
void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
|
||||||
const CodePtr patch_location = code->getCurr();
|
const CodePtr patch_location = code.getCurr();
|
||||||
if (target_code_ptr) {
|
if (target_code_ptr) {
|
||||||
code->jmp(target_code_ptr);
|
code.jmp(target_code_ptr);
|
||||||
} else {
|
} else {
|
||||||
code->mov(rax, A64::LocationDescriptor{target_desc}.PC());
|
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
|
||||||
code->mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||||
code->jmp(code->GetReturnFromRunCodeAddress());
|
code.jmp(code.GetReturnFromRunCodeAddress());
|
||||||
}
|
}
|
||||||
code->EnsurePatchLocationSize(patch_location, 30); // TODO: Reduce size
|
code.EnsurePatchLocationSize(patch_location, 30); // TODO: Reduce size
|
||||||
}
|
}
|
||||||
|
|
||||||
void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) {
|
void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) {
|
||||||
if (!target_code_ptr) {
|
if (!target_code_ptr) {
|
||||||
target_code_ptr = code->GetReturnFromRunCodeAddress();
|
target_code_ptr = code.GetReturnFromRunCodeAddress();
|
||||||
}
|
}
|
||||||
const CodePtr patch_location = code->getCurr();
|
const CodePtr patch_location = code.getCurr();
|
||||||
code->mov(code->rcx, reinterpret_cast<u64>(target_code_ptr));
|
code.mov(code.rcx, reinterpret_cast<u64>(target_code_ptr));
|
||||||
code->EnsurePatchLocationSize(patch_location, 10);
|
code.EnsurePatchLocationSize(patch_location, 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::BackendX64
|
} // namespace Dynarmic::BackendX64
|
||||||
|
|
|
@ -27,7 +27,7 @@ struct A64EmitContext final : public EmitContext {
|
||||||
|
|
||||||
class A64EmitX64 final : public EmitX64 {
|
class A64EmitX64 final : public EmitX64 {
|
||||||
public:
|
public:
|
||||||
A64EmitX64(BlockOfCode* code, A64::UserConfig conf);
|
A64EmitX64(BlockOfCode& code, A64::UserConfig conf);
|
||||||
~A64EmitX64() override;
|
~A64EmitX64() override;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -39,7 +39,7 @@ public:
|
||||||
explicit Impl(UserConfig conf)
|
explicit Impl(UserConfig conf)
|
||||||
: conf(conf)
|
: conf(conf)
|
||||||
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state})
|
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state})
|
||||||
, emitter(&block_of_code, conf)
|
, emitter(block_of_code, conf)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
~Impl() = default;
|
~Impl() = default;
|
||||||
|
|
|
@ -55,7 +55,7 @@ static FrameInfo CalculateFrameInfo(size_t num_gprs, size_t num_xmms, size_t fra
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename RegisterArrayT>
|
template<typename RegisterArrayT>
|
||||||
void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size, const RegisterArrayT& regs) {
|
void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size, const RegisterArrayT& regs) {
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
|
|
||||||
const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
|
const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
|
||||||
|
@ -65,25 +65,25 @@ void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_si
|
||||||
|
|
||||||
for (HostLoc gpr : regs) {
|
for (HostLoc gpr : regs) {
|
||||||
if (HostLocIsGPR(gpr)) {
|
if (HostLocIsGPR(gpr)) {
|
||||||
code->push(HostLocToReg64(gpr));
|
code.push(HostLocToReg64(gpr));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (frame_info.stack_subtraction != 0) {
|
if (frame_info.stack_subtraction != 0) {
|
||||||
code->sub(rsp, u32(frame_info.stack_subtraction));
|
code.sub(rsp, u32(frame_info.stack_subtraction));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t xmm_offset = frame_info.xmm_offset;
|
size_t xmm_offset = frame_info.xmm_offset;
|
||||||
for (HostLoc xmm : regs) {
|
for (HostLoc xmm : regs) {
|
||||||
if (HostLocIsXMM(xmm)) {
|
if (HostLocIsXMM(xmm)) {
|
||||||
code->movaps(code->xword[rsp + xmm_offset], HostLocToXmm(xmm));
|
code.movaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm));
|
||||||
xmm_offset += XMM_SIZE;
|
xmm_offset += XMM_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename RegisterArrayT>
|
template<typename RegisterArrayT>
|
||||||
void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size, const RegisterArrayT& regs) {
|
void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size, const RegisterArrayT& regs) {
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
|
|
||||||
const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
|
const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
|
||||||
|
@ -94,35 +94,35 @@ void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_siz
|
||||||
size_t xmm_offset = frame_info.xmm_offset;
|
size_t xmm_offset = frame_info.xmm_offset;
|
||||||
for (HostLoc xmm : regs) {
|
for (HostLoc xmm : regs) {
|
||||||
if (HostLocIsXMM(xmm)) {
|
if (HostLocIsXMM(xmm)) {
|
||||||
code->movaps(HostLocToXmm(xmm), code->xword[rsp + xmm_offset]);
|
code.movaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]);
|
||||||
xmm_offset += XMM_SIZE;
|
xmm_offset += XMM_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (frame_info.stack_subtraction != 0) {
|
if (frame_info.stack_subtraction != 0) {
|
||||||
code->add(rsp, u32(frame_info.stack_subtraction));
|
code.add(rsp, u32(frame_info.stack_subtraction));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (HostLoc gpr : Common::Reverse(regs)) {
|
for (HostLoc gpr : Common::Reverse(regs)) {
|
||||||
if (HostLocIsGPR(gpr)) {
|
if (HostLocIsGPR(gpr)) {
|
||||||
code->pop(HostLocToReg64(gpr));
|
code.pop(HostLocToReg64(gpr));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) {
|
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
|
||||||
ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE);
|
ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) {
|
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
|
||||||
ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE);
|
ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) {
|
void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
|
||||||
ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
|
ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) {
|
void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
|
||||||
ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
|
ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -109,9 +109,9 @@ constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
|
||||||
|
|
||||||
static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 30, "Invalid total number of registers");
|
static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 30, "Invalid total number of registers");
|
||||||
|
|
||||||
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0);
|
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
|
||||||
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0);
|
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
|
||||||
void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0);
|
void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
|
||||||
void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0);
|
void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
|
||||||
|
|
||||||
} // namespace Dynarmic::BackendX64
|
} // namespace Dynarmic::BackendX64
|
||||||
|
|
|
@ -38,10 +38,10 @@ BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi)
|
||||||
: Xbyak::CodeGenerator(TOTAL_CODE_SIZE)
|
: Xbyak::CodeGenerator(TOTAL_CODE_SIZE)
|
||||||
, cb(std::move(cb))
|
, cb(std::move(cb))
|
||||||
, jsi(jsi)
|
, jsi(jsi)
|
||||||
, constant_pool(this, 256)
|
, constant_pool(*this, 256)
|
||||||
{
|
{
|
||||||
GenRunCode();
|
GenRunCode();
|
||||||
exception_handler.Register(this);
|
exception_handler.Register(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlockOfCode::PreludeComplete() {
|
void BlockOfCode::PreludeComplete() {
|
||||||
|
@ -107,12 +107,12 @@ void BlockOfCode::GenRunCode() {
|
||||||
align();
|
align();
|
||||||
run_code_from = getCurr<RunCodeFromFuncType>();
|
run_code_from = getCurr<RunCodeFromFuncType>();
|
||||||
|
|
||||||
ABI_PushCalleeSaveRegistersAndAdjustStack(this);
|
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
|
||||||
|
|
||||||
mov(r15, ABI_PARAM1);
|
mov(r15, ABI_PARAM1);
|
||||||
mov(r14, ABI_PARAM2); // save temporarily in non-volatile register
|
mov(r14, ABI_PARAM2); // save temporarily in non-volatile register
|
||||||
|
|
||||||
cb.GetTicksRemaining->EmitCall(this);
|
cb.GetTicksRemaining->EmitCall(*this);
|
||||||
mov(qword[r15 + jsi.offsetof_cycles_to_run], ABI_RETURN);
|
mov(qword[r15 + jsi.offsetof_cycles_to_run], ABI_RETURN);
|
||||||
mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN);
|
mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN);
|
||||||
|
|
||||||
|
@ -126,18 +126,18 @@ void BlockOfCode::GenRunCode() {
|
||||||
// 1. It saves all the registers we as a callee need to save.
|
// 1. It saves all the registers we as a callee need to save.
|
||||||
// 2. It aligns the stack so that the code the JIT emits can assume
|
// 2. It aligns the stack so that the code the JIT emits can assume
|
||||||
// that the stack is appropriately aligned for CALLs.
|
// that the stack is appropriately aligned for CALLs.
|
||||||
ABI_PushCalleeSaveRegistersAndAdjustStack(this);
|
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
|
||||||
|
|
||||||
mov(r15, ABI_PARAM1);
|
mov(r15, ABI_PARAM1);
|
||||||
|
|
||||||
cb.GetTicksRemaining->EmitCall(this);
|
cb.GetTicksRemaining->EmitCall(*this);
|
||||||
mov(qword[r15 + jsi.offsetof_cycles_to_run], ABI_RETURN);
|
mov(qword[r15 + jsi.offsetof_cycles_to_run], ABI_RETURN);
|
||||||
mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN);
|
mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN);
|
||||||
|
|
||||||
L(enter_mxcsr_then_loop);
|
L(enter_mxcsr_then_loop);
|
||||||
SwitchMxcsrOnEntry();
|
SwitchMxcsrOnEntry();
|
||||||
L(loop);
|
L(loop);
|
||||||
cb.LookupBlock->EmitCall(this);
|
cb.LookupBlock->EmitCall(*this);
|
||||||
|
|
||||||
jmp(ABI_RETURN);
|
jmp(ABI_RETURN);
|
||||||
|
|
||||||
|
@ -152,12 +152,12 @@ void BlockOfCode::GenRunCode() {
|
||||||
SwitchMxcsrOnExit();
|
SwitchMxcsrOnExit();
|
||||||
}
|
}
|
||||||
|
|
||||||
cb.AddTicks->EmitCall(this, [this](Xbyak::Reg64 param1) {
|
cb.AddTicks->EmitCall(*this, [this](Xbyak::Reg64 param1) {
|
||||||
mov(param1, qword[r15 + jsi.offsetof_cycles_to_run]);
|
mov(param1, qword[r15 + jsi.offsetof_cycles_to_run]);
|
||||||
sub(param1, qword[r15 + jsi.offsetof_cycles_remaining]);
|
sub(param1, qword[r15 + jsi.offsetof_cycles_remaining]);
|
||||||
});
|
});
|
||||||
|
|
||||||
ABI_PopCalleeSaveRegistersAndAdjustStack(this);
|
ABI_PopCalleeSaveRegistersAndAdjustStack(*this);
|
||||||
ret();
|
ret();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -138,7 +138,7 @@ private:
|
||||||
ExceptionHandler();
|
ExceptionHandler();
|
||||||
~ExceptionHandler();
|
~ExceptionHandler();
|
||||||
|
|
||||||
void Register(BlockOfCode* code);
|
void Register(BlockOfCode& code);
|
||||||
private:
|
private:
|
||||||
struct Impl;
|
struct Impl;
|
||||||
std::unique_ptr<Impl> impl;
|
std::unique_ptr<Impl> impl;
|
||||||
|
|
|
@ -9,48 +9,48 @@
|
||||||
|
|
||||||
namespace Dynarmic::BackendX64 {
|
namespace Dynarmic::BackendX64 {
|
||||||
|
|
||||||
void SimpleCallback::EmitCall(BlockOfCode* code, std::function<void()> l) {
|
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void()> l) {
|
||||||
l();
|
l();
|
||||||
code->CallFunction(fn);
|
code.CallFunction(fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SimpleCallback::EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64)> l) {
|
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64)> l) {
|
||||||
l(code->ABI_PARAM1);
|
l(code.ABI_PARAM1);
|
||||||
code->CallFunction(fn);
|
code.CallFunction(fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SimpleCallback::EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) {
|
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) {
|
||||||
l(code->ABI_PARAM1, code->ABI_PARAM2);
|
l(code.ABI_PARAM1, code.ABI_PARAM2);
|
||||||
code->CallFunction(fn);
|
code.CallFunction(fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SimpleCallback::EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) {
|
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) {
|
||||||
l(code->ABI_PARAM1, code->ABI_PARAM2, code->ABI_PARAM3);
|
l(code.ABI_PARAM1, code.ABI_PARAM2, code.ABI_PARAM3);
|
||||||
code->CallFunction(fn);
|
code.CallFunction(fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ArgCallback::EmitCall(BlockOfCode* code, std::function<void()> l) {
|
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void()> l) {
|
||||||
l();
|
l();
|
||||||
code->mov(code->ABI_PARAM1, arg);
|
code.mov(code.ABI_PARAM1, arg);
|
||||||
code->CallFunction(fn);
|
code.CallFunction(fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ArgCallback::EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64)> l) {
|
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64)> l) {
|
||||||
l(code->ABI_PARAM2);
|
l(code.ABI_PARAM2);
|
||||||
code->mov(code->ABI_PARAM1, arg);
|
code.mov(code.ABI_PARAM1, arg);
|
||||||
code->CallFunction(fn);
|
code.CallFunction(fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ArgCallback::EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) {
|
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) {
|
||||||
l(code->ABI_PARAM2, code->ABI_PARAM3);
|
l(code.ABI_PARAM2, code.ABI_PARAM3);
|
||||||
code->mov(code->ABI_PARAM1, arg);
|
code.mov(code.ABI_PARAM1, arg);
|
||||||
code->CallFunction(fn);
|
code.CallFunction(fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ArgCallback::EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) {
|
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) {
|
||||||
l(code->ABI_PARAM2, code->ABI_PARAM3, code->ABI_PARAM4);
|
l(code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4);
|
||||||
code->mov(code->ABI_PARAM1, arg);
|
code.mov(code.ABI_PARAM1, arg);
|
||||||
code->CallFunction(fn);
|
code.CallFunction(fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::BackendX64
|
} // namespace Dynarmic::BackendX64
|
||||||
|
|
|
@ -20,10 +20,10 @@ class Callback {
|
||||||
public:
|
public:
|
||||||
virtual ~Callback() = default;
|
virtual ~Callback() = default;
|
||||||
|
|
||||||
virtual void EmitCall(BlockOfCode* code, std::function<void()> fn = []{}) = 0;
|
virtual void EmitCall(BlockOfCode& code, std::function<void()> fn = []{}) = 0;
|
||||||
virtual void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64)> fn) = 0;
|
virtual void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64)> fn) = 0;
|
||||||
virtual void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> fn) = 0;
|
virtual void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> fn) = 0;
|
||||||
virtual void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> fn) = 0;
|
virtual void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> fn) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
class SimpleCallback final : public Callback {
|
class SimpleCallback final : public Callback {
|
||||||
|
@ -33,10 +33,10 @@ public:
|
||||||
|
|
||||||
~SimpleCallback() override = default;
|
~SimpleCallback() override = default;
|
||||||
|
|
||||||
void EmitCall(BlockOfCode* code, std::function<void()> l = []{}) override;
|
void EmitCall(BlockOfCode& code, std::function<void()> l = []{}) override;
|
||||||
void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64)> l) override;
|
void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64)> l) override;
|
||||||
void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) override;
|
void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) override;
|
||||||
void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) override;
|
void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void (*fn)();
|
void (*fn)();
|
||||||
|
@ -49,10 +49,10 @@ public:
|
||||||
|
|
||||||
~ArgCallback() override = default;
|
~ArgCallback() override = default;
|
||||||
|
|
||||||
void EmitCall(BlockOfCode* code, std::function<void()> l = []{}) override;
|
void EmitCall(BlockOfCode& code, std::function<void()> l = []{}) override;
|
||||||
void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64)> l) override;
|
void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64)> l) override;
|
||||||
void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) override;
|
void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64)> l) override;
|
||||||
void EmitCall(BlockOfCode* code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) override;
|
void EmitCall(BlockOfCode& code, std::function<void(Xbyak::Reg64, Xbyak::Reg64, Xbyak::Reg64)> l) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void (*fn)();
|
void (*fn)();
|
||||||
|
|
|
@ -12,10 +12,10 @@
|
||||||
|
|
||||||
namespace Dynarmic::BackendX64 {
|
namespace Dynarmic::BackendX64 {
|
||||||
|
|
||||||
ConstantPool::ConstantPool(BlockOfCode* code, size_t size) : code(code), pool_size(size) {
|
ConstantPool::ConstantPool(BlockOfCode& code, size_t size) : code(code), pool_size(size) {
|
||||||
code->int3();
|
code.int3();
|
||||||
code->align(align_size);
|
code.align(align_size);
|
||||||
pool_begin = reinterpret_cast<u8*>(code->AllocateFromCodeSpace(size));
|
pool_begin = reinterpret_cast<u8*>(code.AllocateFromCodeSpace(size));
|
||||||
std::memset(pool_begin, 0, size);
|
std::memset(pool_begin, 0, size);
|
||||||
current_pool_ptr = pool_begin;
|
current_pool_ptr = pool_begin;
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,7 @@ Xbyak::Address ConstantPool::GetConstant(u64 constant) {
|
||||||
iter = constant_info.emplace(constant, current_pool_ptr).first;
|
iter = constant_info.emplace(constant, current_pool_ptr).first;
|
||||||
current_pool_ptr += align_size;
|
current_pool_ptr += align_size;
|
||||||
}
|
}
|
||||||
return code->xword[code->rip + iter->second];
|
return code.xword[code.rip + iter->second];
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::BackendX64
|
} // namespace Dynarmic::BackendX64
|
||||||
|
|
|
@ -22,7 +22,7 @@ class BlockOfCode;
|
||||||
/// already exists, its memory location is reused.
|
/// already exists, its memory location is reused.
|
||||||
class ConstantPool final {
|
class ConstantPool final {
|
||||||
public:
|
public:
|
||||||
ConstantPool(BlockOfCode* code, size_t size);
|
ConstantPool(BlockOfCode& code, size_t size);
|
||||||
|
|
||||||
Xbyak::Address GetConstant(u64 constant);
|
Xbyak::Address GetConstant(u64 constant);
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ private:
|
||||||
|
|
||||||
std::map<u64, void*> constant_info;
|
std::map<u64, void*> constant_info;
|
||||||
|
|
||||||
BlockOfCode* code;
|
BlockOfCode& code;
|
||||||
size_t pool_size;
|
size_t pool_size;
|
||||||
u8* pool_begin;
|
u8* pool_begin;
|
||||||
u8* current_pool_ptr;
|
u8* current_pool_ptr;
|
||||||
|
|
|
@ -31,7 +31,7 @@ void EmitContext::EraseInstruction(IR::Inst* inst) {
|
||||||
inst->ClearArgs();
|
inst->ClearArgs();
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitX64::EmitX64(BlockOfCode* code)
|
EmitX64::EmitX64(BlockOfCode& code)
|
||||||
: code(code) {}
|
: code(code) {}
|
||||||
|
|
||||||
EmitX64::~EmitX64() = default;
|
EmitX64::~EmitX64() = default;
|
||||||
|
@ -47,7 +47,7 @@ void EmitX64::EmitVoid(EmitContext&, IR::Inst*) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitBreakpoint(EmitContext&, IR::Inst*) {
|
void EmitX64::EmitBreakpoint(EmitContext&, IR::Inst*) {
|
||||||
code->int3();
|
code.int3();
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -63,21 +63,21 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, I
|
||||||
auto iter = block_descriptors.find(target);
|
auto iter = block_descriptors.find(target);
|
||||||
CodePtr target_code_ptr = iter != block_descriptors.end()
|
CodePtr target_code_ptr = iter != block_descriptors.end()
|
||||||
? iter->second.entrypoint
|
? iter->second.entrypoint
|
||||||
: code->GetReturnFromRunCodeAddress();
|
: code.GetReturnFromRunCodeAddress();
|
||||||
|
|
||||||
code->mov(index_reg.cvt32(), dword[r15 + code->GetJitStateInfo().offsetof_rsb_ptr]);
|
code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]);
|
||||||
|
|
||||||
code->mov(loc_desc_reg, target.Value());
|
code.mov(loc_desc_reg, target.Value());
|
||||||
|
|
||||||
patch_information[target].mov_rcx.emplace_back(code->getCurr());
|
patch_information[target].mov_rcx.emplace_back(code.getCurr());
|
||||||
EmitPatchMovRcx(target_code_ptr);
|
EmitPatchMovRcx(target_code_ptr);
|
||||||
|
|
||||||
code->mov(qword[r15 + index_reg * 8 + code->GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg);
|
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg);
|
||||||
code->mov(qword[r15 + index_reg * 8 + code->GetJitStateInfo().offsetof_rsb_codeptrs], rcx);
|
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx);
|
||||||
|
|
||||||
code->add(index_reg.cvt32(), 1);
|
code.add(index_reg.cvt32(), 1);
|
||||||
code->and_(index_reg.cvt32(), u32(code->GetJitStateInfo().rsb_ptr_mask));
|
code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask));
|
||||||
code->mov(dword[r15 + code->GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32());
|
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32());
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -125,9 +125,9 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr({HostLoc::RAX});
|
Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr({HostLoc::RAX});
|
||||||
Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
|
Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
|
||||||
code->cmp(value, 0);
|
code.cmp(value, 0);
|
||||||
code->lahf();
|
code.lahf();
|
||||||
code->seto(code->al);
|
code.seto(code.al);
|
||||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,28 +141,28 @@ void EmitX64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
|
||||||
value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
|
value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
|
||||||
value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
|
value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
|
||||||
value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
|
value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
|
||||||
code->mov(nzcv, value);
|
code.mov(nzcv, value);
|
||||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||||
// TODO: Optimize
|
// TODO: Optimize
|
||||||
code->shr(nzcv, 28);
|
code.shr(nzcv, 28);
|
||||||
code->imul(nzcv, nzcv, 0b00010000'10000001);
|
code.imul(nzcv, nzcv, 0b00010000'10000001);
|
||||||
code->and_(nzcv.cvt8(), 1);
|
code.and_(nzcv.cvt8(), 1);
|
||||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitAddCycles(size_t cycles) {
|
void EmitX64::EmitAddCycles(size_t cycles) {
|
||||||
ASSERT(cycles < std::numeric_limits<u32>::max());
|
ASSERT(cycles < std::numeric_limits<u32>::max());
|
||||||
code->sub(qword[r15 + code->GetJitStateInfo().offsetof_cycles_remaining], static_cast<u32>(cycles));
|
code.sub(qword[r15 + code.GetJitStateInfo().offsetof_cycles_remaining], static_cast<u32>(cycles));
|
||||||
}
|
}
|
||||||
|
|
||||||
Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
|
Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
|
||||||
Xbyak::Label label;
|
Xbyak::Label label;
|
||||||
|
|
||||||
const Xbyak::Reg32 cpsr = eax;
|
const Xbyak::Reg32 cpsr = eax;
|
||||||
code->mov(cpsr, dword[r15 + code->GetJitStateInfo().offsetof_CPSR_nzcv]);
|
code.mov(cpsr, dword[r15 + code.GetJitStateInfo().offsetof_CPSR_nzcv]);
|
||||||
|
|
||||||
constexpr size_t n_shift = 31;
|
constexpr size_t n_shift = 31;
|
||||||
constexpr size_t z_shift = 30;
|
constexpr size_t z_shift = 30;
|
||||||
|
@ -175,91 +175,91 @@ Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
|
||||||
|
|
||||||
switch (cond) {
|
switch (cond) {
|
||||||
case IR::Cond::EQ: //z
|
case IR::Cond::EQ: //z
|
||||||
code->test(cpsr, z_mask);
|
code.test(cpsr, z_mask);
|
||||||
code->jnz(label);
|
code.jnz(label);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::NE: //!z
|
case IR::Cond::NE: //!z
|
||||||
code->test(cpsr, z_mask);
|
code.test(cpsr, z_mask);
|
||||||
code->jz(label);
|
code.jz(label);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::CS: //c
|
case IR::Cond::CS: //c
|
||||||
code->test(cpsr, c_mask);
|
code.test(cpsr, c_mask);
|
||||||
code->jnz(label);
|
code.jnz(label);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::CC: //!c
|
case IR::Cond::CC: //!c
|
||||||
code->test(cpsr, c_mask);
|
code.test(cpsr, c_mask);
|
||||||
code->jz(label);
|
code.jz(label);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::MI: //n
|
case IR::Cond::MI: //n
|
||||||
code->test(cpsr, n_mask);
|
code.test(cpsr, n_mask);
|
||||||
code->jnz(label);
|
code.jnz(label);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::PL: //!n
|
case IR::Cond::PL: //!n
|
||||||
code->test(cpsr, n_mask);
|
code.test(cpsr, n_mask);
|
||||||
code->jz(label);
|
code.jz(label);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::VS: //v
|
case IR::Cond::VS: //v
|
||||||
code->test(cpsr, v_mask);
|
code.test(cpsr, v_mask);
|
||||||
code->jnz(label);
|
code.jnz(label);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::VC: //!v
|
case IR::Cond::VC: //!v
|
||||||
code->test(cpsr, v_mask);
|
code.test(cpsr, v_mask);
|
||||||
code->jz(label);
|
code.jz(label);
|
||||||
break;
|
break;
|
||||||
case IR::Cond::HI: { //c & !z
|
case IR::Cond::HI: { //c & !z
|
||||||
code->and_(cpsr, z_mask | c_mask);
|
code.and_(cpsr, z_mask | c_mask);
|
||||||
code->cmp(cpsr, c_mask);
|
code.cmp(cpsr, c_mask);
|
||||||
code->je(label);
|
code.je(label);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case IR::Cond::LS: { //!c | z
|
case IR::Cond::LS: { //!c | z
|
||||||
code->and_(cpsr, z_mask | c_mask);
|
code.and_(cpsr, z_mask | c_mask);
|
||||||
code->cmp(cpsr, c_mask);
|
code.cmp(cpsr, c_mask);
|
||||||
code->jne(label);
|
code.jne(label);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case IR::Cond::GE: { // n == v
|
case IR::Cond::GE: { // n == v
|
||||||
code->and_(cpsr, n_mask | v_mask);
|
code.and_(cpsr, n_mask | v_mask);
|
||||||
code->jz(label);
|
code.jz(label);
|
||||||
code->cmp(cpsr, n_mask | v_mask);
|
code.cmp(cpsr, n_mask | v_mask);
|
||||||
code->je(label);
|
code.je(label);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case IR::Cond::LT: { // n != v
|
case IR::Cond::LT: { // n != v
|
||||||
Xbyak::Label fail;
|
Xbyak::Label fail;
|
||||||
code->and_(cpsr, n_mask | v_mask);
|
code.and_(cpsr, n_mask | v_mask);
|
||||||
code->jz(fail);
|
code.jz(fail);
|
||||||
code->cmp(cpsr, n_mask | v_mask);
|
code.cmp(cpsr, n_mask | v_mask);
|
||||||
code->jne(label);
|
code.jne(label);
|
||||||
code->L(fail);
|
code.L(fail);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case IR::Cond::GT: { // !z & (n == v)
|
case IR::Cond::GT: { // !z & (n == v)
|
||||||
const Xbyak::Reg32 tmp1 = ebx;
|
const Xbyak::Reg32 tmp1 = ebx;
|
||||||
const Xbyak::Reg32 tmp2 = esi;
|
const Xbyak::Reg32 tmp2 = esi;
|
||||||
code->mov(tmp1, cpsr);
|
code.mov(tmp1, cpsr);
|
||||||
code->mov(tmp2, cpsr);
|
code.mov(tmp2, cpsr);
|
||||||
code->shr(tmp1, n_shift);
|
code.shr(tmp1, n_shift);
|
||||||
code->shr(tmp2, v_shift);
|
code.shr(tmp2, v_shift);
|
||||||
code->shr(cpsr, z_shift);
|
code.shr(cpsr, z_shift);
|
||||||
code->xor_(tmp1, tmp2);
|
code.xor_(tmp1, tmp2);
|
||||||
code->or_(tmp1, cpsr);
|
code.or_(tmp1, cpsr);
|
||||||
code->test(tmp1, 1);
|
code.test(tmp1, 1);
|
||||||
code->jz(label);
|
code.jz(label);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case IR::Cond::LE: { // z | (n != v)
|
case IR::Cond::LE: { // z | (n != v)
|
||||||
const Xbyak::Reg32 tmp1 = ebx;
|
const Xbyak::Reg32 tmp1 = ebx;
|
||||||
const Xbyak::Reg32 tmp2 = esi;
|
const Xbyak::Reg32 tmp2 = esi;
|
||||||
code->mov(tmp1, cpsr);
|
code.mov(tmp1, cpsr);
|
||||||
code->mov(tmp2, cpsr);
|
code.mov(tmp2, cpsr);
|
||||||
code->shr(tmp1, n_shift);
|
code.shr(tmp1, n_shift);
|
||||||
code->shr(tmp2, v_shift);
|
code.shr(tmp2, v_shift);
|
||||||
code->shr(cpsr, z_shift);
|
code.shr(cpsr, z_shift);
|
||||||
code->xor_(tmp1, tmp2);
|
code.xor_(tmp1, tmp2);
|
||||||
code->or_(tmp1, cpsr);
|
code.or_(tmp1, cpsr);
|
||||||
code->test(tmp1, 1);
|
code.test(tmp1, 1);
|
||||||
code->jnz(label);
|
code.jnz(label);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -281,7 +281,7 @@ void EmitX64::EmitCondPrelude(const IR::Block& block) {
|
||||||
Xbyak::Label pass = EmitCond(block.GetCondition());
|
Xbyak::Label pass = EmitCond(block.GetCondition());
|
||||||
EmitAddCycles(block.ConditionFailedCycleCount());
|
EmitAddCycles(block.ConditionFailedCycleCount());
|
||||||
EmitTerminal(IR::Term::LinkBlock{block.ConditionFailedLocation()}, block.Location());
|
EmitTerminal(IR::Term::LinkBlock{block.ConditionFailedLocation()}, block.Location());
|
||||||
code->L(pass);
|
code.L(pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) {
|
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) {
|
||||||
|
@ -296,25 +296,25 @@ void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) {
|
void EmitX64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) {
|
||||||
const CodePtr save_code_ptr = code->getCurr();
|
const CodePtr save_code_ptr = code.getCurr();
|
||||||
const PatchInformation& patch_info = patch_information[desc];
|
const PatchInformation& patch_info = patch_information[desc];
|
||||||
|
|
||||||
for (CodePtr location : patch_info.jg) {
|
for (CodePtr location : patch_info.jg) {
|
||||||
code->SetCodePtr(location);
|
code.SetCodePtr(location);
|
||||||
EmitPatchJg(desc, bb);
|
EmitPatchJg(desc, bb);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (CodePtr location : patch_info.jmp) {
|
for (CodePtr location : patch_info.jmp) {
|
||||||
code->SetCodePtr(location);
|
code.SetCodePtr(location);
|
||||||
EmitPatchJmp(desc, bb);
|
EmitPatchJmp(desc, bb);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (CodePtr location : patch_info.mov_rcx) {
|
for (CodePtr location : patch_info.mov_rcx) {
|
||||||
code->SetCodePtr(location);
|
code.SetCodePtr(location);
|
||||||
EmitPatchMovRcx(bb);
|
EmitPatchMovRcx(bb);
|
||||||
}
|
}
|
||||||
|
|
||||||
code->SetCodePtr(save_code_ptr);
|
code.SetCodePtr(save_code_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::Unpatch(const IR::LocationDescriptor& desc) {
|
void EmitX64::Unpatch(const IR::LocationDescriptor& desc) {
|
||||||
|
|
|
@ -48,7 +48,7 @@ public:
|
||||||
size_t size; // Length in bytes of emitted code
|
size_t size; // Length in bytes of emitted code
|
||||||
};
|
};
|
||||||
|
|
||||||
EmitX64(BlockOfCode* code);
|
EmitX64(BlockOfCode& code);
|
||||||
virtual ~EmitX64();
|
virtual ~EmitX64();
|
||||||
|
|
||||||
/// Looks up an emitted host block in the cache.
|
/// Looks up an emitted host block in the cache.
|
||||||
|
@ -100,7 +100,7 @@ protected:
|
||||||
virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0;
|
virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0;
|
||||||
|
|
||||||
// State
|
// State
|
||||||
BlockOfCode* code;
|
BlockOfCode& code;
|
||||||
std::unordered_map<IR::LocationDescriptor, BlockDescriptor> block_descriptors;
|
std::unordered_map<IR::LocationDescriptor, BlockDescriptor> block_descriptors;
|
||||||
std::unordered_map<IR::LocationDescriptor, PatchInformation> patch_information;
|
std::unordered_map<IR::LocationDescriptor, PatchInformation> patch_information;
|
||||||
};
|
};
|
||||||
|
|
|
@ -44,21 +44,21 @@ static void EmitMixColumns(std::array<Argument, 3> args, EmitContext& ctx, Block
|
||||||
void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tAESNI)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAESNI)) {
|
||||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->aesimc(result, operand);
|
code.aesimc(result, operand);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
} else {
|
} else {
|
||||||
EmitMixColumns(args, ctx, *code, inst, Common::InverseMixColumns);
|
EmitMixColumns(args, ctx, code, inst, Common::InverseMixColumns);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitAESMixColumns(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitAESMixColumns(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
EmitMixColumns(args, ctx, *code, inst, Common::MixColumns);
|
EmitMixColumns(args, ctx, code, inst, Common::MixColumns);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::BackendX64
|
} // namespace Dynarmic::BackendX64
|
||||||
|
|
|
@ -42,35 +42,35 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitCRC32Castagnoli(*code, ctx, inst, 8);
|
EmitCRC32Castagnoli(code, ctx, inst, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitCRC32Castagnoli16(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitCRC32Castagnoli16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitCRC32Castagnoli(*code, ctx, inst, 16);
|
EmitCRC32Castagnoli(code, ctx, inst, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitCRC32Castagnoli32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitCRC32Castagnoli32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitCRC32Castagnoli(*code, ctx, inst, 32);
|
EmitCRC32Castagnoli(code, ctx, inst, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitCRC32Castagnoli64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitCRC32Castagnoli64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitCRC32Castagnoli(*code, ctx, inst, 64);
|
EmitCRC32Castagnoli(code, ctx, inst, 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitCRC32ISO8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitCRC32ISO8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitCRC32ISO(*code, ctx, inst, 8);
|
EmitCRC32ISO(code, ctx, inst, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitCRC32ISO16(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitCRC32ISO16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitCRC32ISO(*code, ctx, inst, 16);
|
EmitCRC32ISO(code, ctx, inst, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitCRC32ISO32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitCRC32ISO32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitCRC32ISO(*code, ctx, inst, 32);
|
EmitCRC32ISO(code, ctx, inst, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitCRC32ISO64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitCRC32ISO64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitCRC32ISO(*code, ctx, inst, 64);
|
EmitCRC32ISO(code, ctx, inst, 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::BackendX64
|
} // namespace Dynarmic::BackendX64
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -29,96 +29,96 @@ constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double
|
||||||
constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
|
constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
|
||||||
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
|
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
|
||||||
|
|
||||||
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
|
||||||
// We need to report back whether we've found a denormal on input.
|
// We need to report back whether we've found a denormal on input.
|
||||||
// SSE doesn't do this for us when SSE's DAZ is enabled.
|
// SSE doesn't do this for us when SSE's DAZ is enabled.
|
||||||
|
|
||||||
code->movd(gpr_scratch, xmm_value);
|
code.movd(gpr_scratch, xmm_value);
|
||||||
code->and_(gpr_scratch, u32(0x7FFFFFFF));
|
code.and_(gpr_scratch, u32(0x7FFFFFFF));
|
||||||
code->sub(gpr_scratch, u32(1));
|
code.sub(gpr_scratch, u32(1));
|
||||||
code->cmp(gpr_scratch, u32(0x007FFFFE));
|
code.cmp(gpr_scratch, u32(0x007FFFFE));
|
||||||
code->ja(end);
|
code.ja(end);
|
||||||
code->pxor(xmm_value, xmm_value);
|
code.pxor(xmm_value, xmm_value);
|
||||||
code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_IDC], u32(1 << 7));
|
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_IDC], u32(1 << 7));
|
||||||
code->L(end);
|
code.L(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
static void DenormalsAreZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
|
||||||
auto mask = code->MConst(f64_non_sign_mask);
|
auto mask = code.MConst(f64_non_sign_mask);
|
||||||
mask.setBit(64);
|
mask.setBit(64);
|
||||||
auto penult_denormal = code->MConst(f64_penultimate_positive_denormal);
|
auto penult_denormal = code.MConst(f64_penultimate_positive_denormal);
|
||||||
penult_denormal.setBit(64);
|
penult_denormal.setBit(64);
|
||||||
|
|
||||||
code->movq(gpr_scratch, xmm_value);
|
code.movq(gpr_scratch, xmm_value);
|
||||||
code->and_(gpr_scratch, mask);
|
code.and_(gpr_scratch, mask);
|
||||||
code->sub(gpr_scratch, u32(1));
|
code.sub(gpr_scratch, u32(1));
|
||||||
code->cmp(gpr_scratch, penult_denormal);
|
code.cmp(gpr_scratch, penult_denormal);
|
||||||
code->ja(end);
|
code.ja(end);
|
||||||
code->pxor(xmm_value, xmm_value);
|
code.pxor(xmm_value, xmm_value);
|
||||||
code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_IDC], u32(1 << 7));
|
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_IDC], u32(1 << 7));
|
||||||
code->L(end);
|
code.L(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
static void FlushToZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
|
||||||
code->movd(gpr_scratch, xmm_value);
|
code.movd(gpr_scratch, xmm_value);
|
||||||
code->and_(gpr_scratch, u32(0x7FFFFFFF));
|
code.and_(gpr_scratch, u32(0x7FFFFFFF));
|
||||||
code->sub(gpr_scratch, u32(1));
|
code.sub(gpr_scratch, u32(1));
|
||||||
code->cmp(gpr_scratch, u32(0x007FFFFE));
|
code.cmp(gpr_scratch, u32(0x007FFFFE));
|
||||||
code->ja(end);
|
code.ja(end);
|
||||||
code->pxor(xmm_value, xmm_value);
|
code.pxor(xmm_value, xmm_value);
|
||||||
code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3));
|
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3));
|
||||||
code->L(end);
|
code.L(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FlushToZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
|
||||||
auto mask = code->MConst(f64_non_sign_mask);
|
auto mask = code.MConst(f64_non_sign_mask);
|
||||||
mask.setBit(64);
|
mask.setBit(64);
|
||||||
auto penult_denormal = code->MConst(f64_penultimate_positive_denormal);
|
auto penult_denormal = code.MConst(f64_penultimate_positive_denormal);
|
||||||
penult_denormal.setBit(64);
|
penult_denormal.setBit(64);
|
||||||
|
|
||||||
code->movq(gpr_scratch, xmm_value);
|
code.movq(gpr_scratch, xmm_value);
|
||||||
code->and_(gpr_scratch, mask);
|
code.and_(gpr_scratch, mask);
|
||||||
code->sub(gpr_scratch, u32(1));
|
code.sub(gpr_scratch, u32(1));
|
||||||
code->cmp(gpr_scratch, penult_denormal);
|
code.cmp(gpr_scratch, penult_denormal);
|
||||||
code->ja(end);
|
code.ja(end);
|
||||||
code->pxor(xmm_value, xmm_value);
|
code.pxor(xmm_value, xmm_value);
|
||||||
code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3));
|
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3));
|
||||||
code->L(end);
|
code.L(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DefaultNaN32(BlockOfCode* code, Xbyak::Xmm xmm_value) {
|
static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) {
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
|
||||||
code->ucomiss(xmm_value, xmm_value);
|
code.ucomiss(xmm_value, xmm_value);
|
||||||
code->jnp(end);
|
code.jnp(end);
|
||||||
code->movaps(xmm_value, code->MConst(f32_nan));
|
code.movaps(xmm_value, code.MConst(f32_nan));
|
||||||
code->L(end);
|
code.L(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DefaultNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value) {
|
static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) {
|
||||||
Xbyak::Label end;
|
Xbyak::Label end;
|
||||||
|
|
||||||
code->ucomisd(xmm_value, xmm_value);
|
code.ucomisd(xmm_value, xmm_value);
|
||||||
code->jnp(end);
|
code.jnp(end);
|
||||||
code->movaps(xmm_value, code->MConst(f64_nan));
|
code.movaps(xmm_value, code.MConst(f64_nan));
|
||||||
code->L(end);
|
code.L(end);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ZeroIfNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
|
static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
|
||||||
code->pxor(xmm_scratch, xmm_scratch);
|
code.pxor(xmm_scratch, xmm_scratch);
|
||||||
code->cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
||||||
code->pand(xmm_value, xmm_scratch);
|
code.pand(xmm_value, xmm_scratch);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FPThreeOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
@ -129,7 +129,7 @@ static void FPThreeOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi
|
||||||
DenormalsAreZero32(code, result, gpr_scratch);
|
DenormalsAreZero32(code, result, gpr_scratch);
|
||||||
DenormalsAreZero32(code, operand, gpr_scratch);
|
DenormalsAreZero32(code, operand, gpr_scratch);
|
||||||
}
|
}
|
||||||
(code->*fn)(result, operand);
|
(code.*fn)(result, operand);
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
FlushToZero32(code, result, gpr_scratch);
|
FlushToZero32(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
|
@ -140,7 +140,7 @@ static void FPThreeOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FPThreeOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
@ -151,7 +151,7 @@ static void FPThreeOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi
|
||||||
DenormalsAreZero64(code, result, gpr_scratch);
|
DenormalsAreZero64(code, result, gpr_scratch);
|
||||||
DenormalsAreZero64(code, operand, gpr_scratch);
|
DenormalsAreZero64(code, operand, gpr_scratch);
|
||||||
}
|
}
|
||||||
(code->*fn)(result, operand);
|
(code.*fn)(result, operand);
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
FlushToZero64(code, result, gpr_scratch);
|
FlushToZero64(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
|
@ -162,7 +162,7 @@ static void FPThreeOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, voi
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FPTwoOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
@ -172,7 +172,7 @@ static void FPTwoOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void
|
||||||
DenormalsAreZero32(code, result, gpr_scratch);
|
DenormalsAreZero32(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
|
|
||||||
(code->*fn)(result, result);
|
(code.*fn)(result, result);
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
FlushToZero32(code, result, gpr_scratch);
|
FlushToZero32(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
|
@ -183,7 +183,7 @@ static void FPTwoOp32(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FPTwoOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
static void FPTwoOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
@ -193,7 +193,7 @@ static void FPTwoOp64(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void
|
||||||
DenormalsAreZero64(code, result, gpr_scratch);
|
DenormalsAreZero64(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
|
|
||||||
(code->*fn)(result, result);
|
(code.*fn)(result, result);
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
FlushToZero64(code, result, gpr_scratch);
|
FlushToZero64(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
|
@ -208,7 +208,7 @@ void EmitX64::EmitFPAbs32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->pand(result, code->MConst(f32_non_sign_mask));
|
code.pand(result, code.MConst(f32_non_sign_mask));
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -217,7 +217,7 @@ void EmitX64::EmitFPAbs64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->pand(result, code->MConst(f64_non_sign_mask));
|
code.pand(result, code.MConst(f64_non_sign_mask));
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -226,7 +226,7 @@ void EmitX64::EmitFPNeg32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->pxor(result, code->MConst(f32_negative_zero));
|
code.pxor(result, code.MConst(f32_negative_zero));
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -235,7 +235,7 @@ void EmitX64::EmitFPNeg64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->pxor(result, code->MConst(f64_negative_zero));
|
code.pxor(result, code.MConst(f64_negative_zero));
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -280,16 +280,16 @@ void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::subsd);
|
FPThreeOp64(code, ctx, inst, &Xbyak::CodeGenerator::subsd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void SetFpscrNzcvFromFlags(BlockOfCode* code, EmitContext& ctx) {
|
static void SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) {
|
||||||
ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl
|
ctx.reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl
|
||||||
Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
code->mov(nzcv, 0x28630000);
|
code.mov(nzcv, 0x28630000);
|
||||||
code->sete(cl);
|
code.sete(cl);
|
||||||
code->rcl(cl, 3);
|
code.rcl(cl, 3);
|
||||||
code->shl(nzcv, cl);
|
code.shl(nzcv, cl);
|
||||||
code->and_(nzcv, 0xF0000000);
|
code.and_(nzcv, 0xF0000000);
|
||||||
code->mov(dword[r15 + code->GetJitStateInfo().offsetof_FPSCR_nzcv], nzcv);
|
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_nzcv], nzcv);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
@ -299,9 +299,9 @@ void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
bool exc_on_qnan = args[2].GetImmediateU1();
|
bool exc_on_qnan = args[2].GetImmediateU1();
|
||||||
|
|
||||||
if (exc_on_qnan) {
|
if (exc_on_qnan) {
|
||||||
code->comiss(reg_a, reg_b);
|
code.comiss(reg_a, reg_b);
|
||||||
} else {
|
} else {
|
||||||
code->ucomiss(reg_a, reg_b);
|
code.ucomiss(reg_a, reg_b);
|
||||||
}
|
}
|
||||||
|
|
||||||
SetFpscrNzcvFromFlags(code, ctx);
|
SetFpscrNzcvFromFlags(code, ctx);
|
||||||
|
@ -314,9 +314,9 @@ void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
bool exc_on_qnan = args[2].GetImmediateU1();
|
bool exc_on_qnan = args[2].GetImmediateU1();
|
||||||
|
|
||||||
if (exc_on_qnan) {
|
if (exc_on_qnan) {
|
||||||
code->comisd(reg_a, reg_b);
|
code.comisd(reg_a, reg_b);
|
||||||
} else {
|
} else {
|
||||||
code->ucomisd(reg_a, reg_b);
|
code.ucomisd(reg_a, reg_b);
|
||||||
}
|
}
|
||||||
|
|
||||||
SetFpscrNzcvFromFlags(code, ctx);
|
SetFpscrNzcvFromFlags(code, ctx);
|
||||||
|
@ -330,7 +330,7 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
DenormalsAreZero32(code, result, gpr_scratch.cvt32());
|
DenormalsAreZero32(code, result, gpr_scratch.cvt32());
|
||||||
}
|
}
|
||||||
code->cvtss2sd(result, result);
|
code.cvtss2sd(result, result);
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
FlushToZero64(code, result, gpr_scratch);
|
FlushToZero64(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
|
@ -349,7 +349,7 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
DenormalsAreZero64(code, result, gpr_scratch);
|
DenormalsAreZero64(code, result, gpr_scratch);
|
||||||
}
|
}
|
||||||
code->cvtsd2ss(result, result);
|
code.cvtsd2ss(result, result);
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
FlushToZero32(code, result, gpr_scratch.cvt32());
|
FlushToZero32(code, result, gpr_scratch.cvt32());
|
||||||
}
|
}
|
||||||
|
@ -373,22 +373,22 @@ void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
DenormalsAreZero32(code, from, to);
|
DenormalsAreZero32(code, from, to);
|
||||||
}
|
}
|
||||||
code->cvtss2sd(from, from);
|
code.cvtss2sd(from, from);
|
||||||
// First time is to set flags
|
// First time is to set flags
|
||||||
if (round_towards_zero) {
|
if (round_towards_zero) {
|
||||||
code->cvttsd2si(to, from); // 32 bit gpr
|
code.cvttsd2si(to, from); // 32 bit gpr
|
||||||
} else {
|
} else {
|
||||||
code->cvtsd2si(to, from); // 32 bit gpr
|
code.cvtsd2si(to, from); // 32 bit gpr
|
||||||
}
|
}
|
||||||
// Clamp to output range
|
// Clamp to output range
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
ZeroIfNaN64(code, from, xmm_scratch);
|
||||||
code->minsd(from, code->MConst(f64_max_s32));
|
code.minsd(from, code.MConst(f64_max_s32));
|
||||||
code->maxsd(from, code->MConst(f64_min_s32));
|
code.maxsd(from, code.MConst(f64_min_s32));
|
||||||
// Second time is for real
|
// Second time is for real
|
||||||
if (round_towards_zero) {
|
if (round_towards_zero) {
|
||||||
code->cvttsd2si(to, from); // 32 bit gpr
|
code.cvttsd2si(to, from); // 32 bit gpr
|
||||||
} else {
|
} else {
|
||||||
code->cvtsd2si(to, from); // 32 bit gpr
|
code.cvtsd2si(to, from); // 32 bit gpr
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, to);
|
ctx.reg_alloc.DefineValue(inst, to);
|
||||||
|
@ -412,19 +412,19 @@ void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
DenormalsAreZero32(code, from, to);
|
DenormalsAreZero32(code, from, to);
|
||||||
}
|
}
|
||||||
code->cvtss2sd(from, from);
|
code.cvtss2sd(from, from);
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
ZeroIfNaN64(code, from, xmm_scratch);
|
||||||
// Bring into SSE range
|
// Bring into SSE range
|
||||||
code->addsd(from, code->MConst(f64_min_s32));
|
code.addsd(from, code.MConst(f64_min_s32));
|
||||||
// First time is to set flags
|
// First time is to set flags
|
||||||
code->cvtsd2si(to, from); // 32 bit gpr
|
code.cvtsd2si(to, from); // 32 bit gpr
|
||||||
// Clamp to output range
|
// Clamp to output range
|
||||||
code->minsd(from, code->MConst(f64_max_s32));
|
code.minsd(from, code.MConst(f64_max_s32));
|
||||||
code->maxsd(from, code->MConst(f64_min_s32));
|
code.maxsd(from, code.MConst(f64_min_s32));
|
||||||
// Actually convert
|
// Actually convert
|
||||||
code->cvtsd2si(to, from); // 32 bit gpr
|
code.cvtsd2si(to, from); // 32 bit gpr
|
||||||
// Bring back into original range
|
// Bring back into original range
|
||||||
code->add(to, u32(2147483648u));
|
code.add(to, u32(2147483648u));
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Xmm xmm_mask = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_mask = ctx.reg_alloc.ScratchXmm();
|
||||||
Xbyak::Reg32 gpr_mask = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 gpr_mask = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
@ -432,25 +432,25 @@ void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (ctx.FPSCR_FTZ()) {
|
if (ctx.FPSCR_FTZ()) {
|
||||||
DenormalsAreZero32(code, from, to);
|
DenormalsAreZero32(code, from, to);
|
||||||
}
|
}
|
||||||
code->cvtss2sd(from, from);
|
code.cvtss2sd(from, from);
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
ZeroIfNaN64(code, from, xmm_scratch);
|
||||||
// Generate masks if out-of-signed-range
|
// Generate masks if out-of-signed-range
|
||||||
code->movaps(xmm_mask, code->MConst(f64_max_s32));
|
code.movaps(xmm_mask, code.MConst(f64_max_s32));
|
||||||
code->cmpltsd(xmm_mask, from);
|
code.cmpltsd(xmm_mask, from);
|
||||||
code->movd(gpr_mask, xmm_mask);
|
code.movd(gpr_mask, xmm_mask);
|
||||||
code->pand(xmm_mask, code->MConst(f64_min_s32));
|
code.pand(xmm_mask, code.MConst(f64_min_s32));
|
||||||
code->and_(gpr_mask, u32(2147483648u));
|
code.and_(gpr_mask, u32(2147483648u));
|
||||||
// Bring into range if necessary
|
// Bring into range if necessary
|
||||||
code->addsd(from, xmm_mask);
|
code.addsd(from, xmm_mask);
|
||||||
// First time is to set flags
|
// First time is to set flags
|
||||||
code->cvttsd2si(to, from); // 32 bit gpr
|
code.cvttsd2si(to, from); // 32 bit gpr
|
||||||
// Clamp to output range
|
// Clamp to output range
|
||||||
code->minsd(from, code->MConst(f64_max_s32));
|
code.minsd(from, code.MConst(f64_max_s32));
|
||||||
code->maxsd(from, code->MConst(f64_min_u32));
|
code.maxsd(from, code.MConst(f64_min_u32));
|
||||||
// Actually convert
|
// Actually convert
|
||||||
code->cvttsd2si(to, from); // 32 bit gpr
|
code.cvttsd2si(to, from); // 32 bit gpr
|
||||||
// Bring back into original range if necessary
|
// Bring back into original range if necessary
|
||||||
code->add(to, gpr_mask);
|
code.add(to, gpr_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, to);
|
ctx.reg_alloc.DefineValue(inst, to);
|
||||||
|
@ -471,19 +471,19 @@ void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
// First time is to set flags
|
// First time is to set flags
|
||||||
if (round_towards_zero) {
|
if (round_towards_zero) {
|
||||||
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
code.cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
||||||
} else {
|
} else {
|
||||||
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
code.cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
||||||
}
|
}
|
||||||
// Clamp to output range
|
// Clamp to output range
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
ZeroIfNaN64(code, from, xmm_scratch);
|
||||||
code->minsd(from, code->MConst(f64_max_s32));
|
code.minsd(from, code.MConst(f64_max_s32));
|
||||||
code->maxsd(from, code->MConst(f64_min_s32));
|
code.maxsd(from, code.MConst(f64_min_s32));
|
||||||
// Second time is for real
|
// Second time is for real
|
||||||
if (round_towards_zero) {
|
if (round_towards_zero) {
|
||||||
code->cvttsd2si(to, from); // 32 bit gpr
|
code.cvttsd2si(to, from); // 32 bit gpr
|
||||||
} else {
|
} else {
|
||||||
code->cvtsd2si(to, from); // 32 bit gpr
|
code.cvtsd2si(to, from); // 32 bit gpr
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, to);
|
ctx.reg_alloc.DefineValue(inst, to);
|
||||||
|
@ -507,16 +507,16 @@ void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
ZeroIfNaN64(code, from, xmm_scratch);
|
||||||
// Bring into SSE range
|
// Bring into SSE range
|
||||||
code->addsd(from, code->MConst(f64_min_s32));
|
code.addsd(from, code.MConst(f64_min_s32));
|
||||||
// First time is to set flags
|
// First time is to set flags
|
||||||
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
code.cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
||||||
// Clamp to output range
|
// Clamp to output range
|
||||||
code->minsd(from, code->MConst(f64_max_s32));
|
code.minsd(from, code.MConst(f64_max_s32));
|
||||||
code->maxsd(from, code->MConst(f64_min_s32));
|
code.maxsd(from, code.MConst(f64_min_s32));
|
||||||
// Actually convert
|
// Actually convert
|
||||||
code->cvtsd2si(to, from); // 32 bit gpr
|
code.cvtsd2si(to, from); // 32 bit gpr
|
||||||
// Bring back into original range
|
// Bring back into original range
|
||||||
code->add(to, u32(2147483648u));
|
code.add(to, u32(2147483648u));
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Xmm xmm_mask = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_mask = ctx.reg_alloc.ScratchXmm();
|
||||||
Xbyak::Reg32 gpr_mask = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 gpr_mask = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
@ -526,22 +526,22 @@ void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
ZeroIfNaN64(code, from, xmm_scratch);
|
ZeroIfNaN64(code, from, xmm_scratch);
|
||||||
// Generate masks if out-of-signed-range
|
// Generate masks if out-of-signed-range
|
||||||
code->movaps(xmm_mask, code->MConst(f64_max_s32));
|
code.movaps(xmm_mask, code.MConst(f64_max_s32));
|
||||||
code->cmpltsd(xmm_mask, from);
|
code.cmpltsd(xmm_mask, from);
|
||||||
code->movd(gpr_mask, xmm_mask);
|
code.movd(gpr_mask, xmm_mask);
|
||||||
code->pand(xmm_mask, code->MConst(f64_min_s32));
|
code.pand(xmm_mask, code.MConst(f64_min_s32));
|
||||||
code->and_(gpr_mask, u32(2147483648u));
|
code.and_(gpr_mask, u32(2147483648u));
|
||||||
// Bring into range if necessary
|
// Bring into range if necessary
|
||||||
code->addsd(from, xmm_mask);
|
code.addsd(from, xmm_mask);
|
||||||
// First time is to set flags
|
// First time is to set flags
|
||||||
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
code.cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
||||||
// Clamp to output range
|
// Clamp to output range
|
||||||
code->minsd(from, code->MConst(f64_max_s32));
|
code.minsd(from, code.MConst(f64_max_s32));
|
||||||
code->maxsd(from, code->MConst(f64_min_u32));
|
code.maxsd(from, code.MConst(f64_min_u32));
|
||||||
// Actually convert
|
// Actually convert
|
||||||
code->cvttsd2si(to, from); // 32 bit gpr
|
code.cvttsd2si(to, from); // 32 bit gpr
|
||||||
// Bring back into original range if necessary
|
// Bring back into original range if necessary
|
||||||
code->add(to, gpr_mask);
|
code.add(to, gpr_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, to);
|
ctx.reg_alloc.DefineValue(inst, to);
|
||||||
|
@ -554,7 +554,7 @@ void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
bool round_to_nearest = args[1].GetImmediateU1();
|
bool round_to_nearest = args[1].GetImmediateU1();
|
||||||
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
||||||
|
|
||||||
code->cvtsi2ss(to, from);
|
code.cvtsi2ss(to, from);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, to);
|
ctx.reg_alloc.DefineValue(inst, to);
|
||||||
}
|
}
|
||||||
|
@ -567,8 +567,8 @@ void EmitX64::EmitFPU32ToSingle(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
||||||
|
|
||||||
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
|
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
|
||||||
code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
|
code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
|
||||||
code->cvtsi2ss(to, from);
|
code.cvtsi2ss(to, from);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, to);
|
ctx.reg_alloc.DefineValue(inst, to);
|
||||||
}
|
}
|
||||||
|
@ -580,7 +580,7 @@ void EmitX64::EmitFPS32ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
bool round_to_nearest = args[1].GetImmediateU1();
|
bool round_to_nearest = args[1].GetImmediateU1();
|
||||||
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
||||||
|
|
||||||
code->cvtsi2sd(to, from);
|
code.cvtsi2sd(to, from);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, to);
|
ctx.reg_alloc.DefineValue(inst, to);
|
||||||
}
|
}
|
||||||
|
@ -593,8 +593,8 @@ void EmitX64::EmitFPU32ToDouble(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
||||||
|
|
||||||
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
|
// We are using a 64-bit GPR register to ensure we don't end up treating the input as signed
|
||||||
code->mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
|
code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary
|
||||||
code->cvtsi2sd(to, from);
|
code.cvtsi2sd(to, from);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, to);
|
ctx.reg_alloc.DefineValue(inst, to);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,18 +23,18 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
code->paddb(xmm_a, xmm_b);
|
code.paddb(xmm_a, xmm_b);
|
||||||
|
|
||||||
if (ge_inst) {
|
if (ge_inst) {
|
||||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||||
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->pcmpeqb(ones, ones);
|
code.pcmpeqb(ones, ones);
|
||||||
|
|
||||||
code->movdqa(xmm_ge, xmm_a);
|
code.movdqa(xmm_ge, xmm_a);
|
||||||
code->pminub(xmm_ge, xmm_b);
|
code.pminub(xmm_ge, xmm_b);
|
||||||
code->pcmpeqb(xmm_ge, xmm_b);
|
code.pcmpeqb(xmm_ge, xmm_b);
|
||||||
code->pxor(xmm_ge, ones);
|
code.pxor(xmm_ge, ones);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||||
ctx.EraseInstruction(ge_inst);
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
@ -54,18 +54,18 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->pxor(xmm_ge, xmm_ge);
|
code.pxor(xmm_ge, xmm_ge);
|
||||||
code->movdqa(saturated_sum, xmm_a);
|
code.movdqa(saturated_sum, xmm_a);
|
||||||
code->paddsb(saturated_sum, xmm_b);
|
code.paddsb(saturated_sum, xmm_b);
|
||||||
code->pcmpgtb(xmm_ge, saturated_sum);
|
code.pcmpgtb(xmm_ge, saturated_sum);
|
||||||
code->pcmpeqb(saturated_sum, saturated_sum);
|
code.pcmpeqb(saturated_sum, saturated_sum);
|
||||||
code->pxor(xmm_ge, saturated_sum);
|
code.pxor(xmm_ge, saturated_sum);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||||
ctx.EraseInstruction(ge_inst);
|
ctx.EraseInstruction(ge_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
code->paddb(xmm_a, xmm_b);
|
code.paddb(xmm_a, xmm_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
@ -77,19 +77,19 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
code->paddw(xmm_a, xmm_b);
|
code.paddw(xmm_a, xmm_b);
|
||||||
|
|
||||||
if (ge_inst) {
|
if (ge_inst) {
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||||
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->pcmpeqb(ones, ones);
|
code.pcmpeqb(ones, ones);
|
||||||
|
|
||||||
code->movdqa(xmm_ge, xmm_a);
|
code.movdqa(xmm_ge, xmm_a);
|
||||||
code->pminuw(xmm_ge, xmm_b);
|
code.pminuw(xmm_ge, xmm_b);
|
||||||
code->pcmpeqw(xmm_ge, xmm_b);
|
code.pcmpeqw(xmm_ge, xmm_b);
|
||||||
code->pxor(xmm_ge, ones);
|
code.pxor(xmm_ge, ones);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||||
ctx.EraseInstruction(ge_inst);
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
@ -98,11 +98,11 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
// !(b <= a+b) == b > a+b
|
// !(b <= a+b) == b > a+b
|
||||||
code->movdqa(tmp_a, xmm_a);
|
code.movdqa(tmp_a, xmm_a);
|
||||||
code->movdqa(tmp_b, xmm_b);
|
code.movdqa(tmp_b, xmm_b);
|
||||||
code->paddw(tmp_a, code->MConst(0x80008000));
|
code.paddw(tmp_a, code.MConst(0x80008000));
|
||||||
code->paddw(tmp_b, code->MConst(0x80008000));
|
code.paddw(tmp_b, code.MConst(0x80008000));
|
||||||
code->pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
|
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
|
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
|
||||||
ctx.EraseInstruction(ge_inst);
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
@ -123,18 +123,18 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->pxor(xmm_ge, xmm_ge);
|
code.pxor(xmm_ge, xmm_ge);
|
||||||
code->movdqa(saturated_sum, xmm_a);
|
code.movdqa(saturated_sum, xmm_a);
|
||||||
code->paddsw(saturated_sum, xmm_b);
|
code.paddsw(saturated_sum, xmm_b);
|
||||||
code->pcmpgtw(xmm_ge, saturated_sum);
|
code.pcmpgtw(xmm_ge, saturated_sum);
|
||||||
code->pcmpeqw(saturated_sum, saturated_sum);
|
code.pcmpeqw(saturated_sum, saturated_sum);
|
||||||
code->pxor(xmm_ge, saturated_sum);
|
code.pxor(xmm_ge, saturated_sum);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||||
ctx.EraseInstruction(ge_inst);
|
ctx.EraseInstruction(ge_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
code->paddw(xmm_a, xmm_b);
|
code.paddw(xmm_a, xmm_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
@ -149,15 +149,15 @@ void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (ge_inst) {
|
if (ge_inst) {
|
||||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->movdqa(xmm_ge, xmm_a);
|
code.movdqa(xmm_ge, xmm_a);
|
||||||
code->pmaxub(xmm_ge, xmm_b);
|
code.pmaxub(xmm_ge, xmm_b);
|
||||||
code->pcmpeqb(xmm_ge, xmm_a);
|
code.pcmpeqb(xmm_ge, xmm_a);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||||
ctx.EraseInstruction(ge_inst);
|
ctx.EraseInstruction(ge_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
code->psubb(xmm_a, xmm_b);
|
code.psubb(xmm_a, xmm_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
@ -173,18 +173,18 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->pxor(xmm_ge, xmm_ge);
|
code.pxor(xmm_ge, xmm_ge);
|
||||||
code->movdqa(saturated_sum, xmm_a);
|
code.movdqa(saturated_sum, xmm_a);
|
||||||
code->psubsb(saturated_sum, xmm_b);
|
code.psubsb(saturated_sum, xmm_b);
|
||||||
code->pcmpgtb(xmm_ge, saturated_sum);
|
code.pcmpgtb(xmm_ge, saturated_sum);
|
||||||
code->pcmpeqb(saturated_sum, saturated_sum);
|
code.pcmpeqb(saturated_sum, saturated_sum);
|
||||||
code->pxor(xmm_ge, saturated_sum);
|
code.pxor(xmm_ge, saturated_sum);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||||
ctx.EraseInstruction(ge_inst);
|
ctx.EraseInstruction(ge_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
code->psubb(xmm_a, xmm_b);
|
code.psubb(xmm_a, xmm_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
@ -197,22 +197,22 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
code->psubw(xmm_a, xmm_b);
|
code.psubw(xmm_a, xmm_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->movdqa(xmm_ge, xmm_a);
|
code.movdqa(xmm_ge, xmm_a);
|
||||||
code->pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
|
code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
|
||||||
code->pcmpeqw(xmm_ge, xmm_a);
|
code.pcmpeqw(xmm_ge, xmm_a);
|
||||||
|
|
||||||
code->psubw(xmm_a, xmm_b);
|
code.psubw(xmm_a, xmm_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||||
ctx.EraseInstruction(ge_inst);
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
@ -226,14 +226,14 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
// (a >= b) == !(b > a)
|
// (a >= b) == !(b > a)
|
||||||
code->pcmpeqb(ones, ones);
|
code.pcmpeqb(ones, ones);
|
||||||
code->paddw(xmm_a, code->MConst(0x80008000));
|
code.paddw(xmm_a, code.MConst(0x80008000));
|
||||||
code->paddw(xmm_b, code->MConst(0x80008000));
|
code.paddw(xmm_b, code.MConst(0x80008000));
|
||||||
code->movdqa(xmm_ge, xmm_b);
|
code.movdqa(xmm_ge, xmm_b);
|
||||||
code->pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
|
code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
|
||||||
code->pxor(xmm_ge, ones);
|
code.pxor(xmm_ge, ones);
|
||||||
|
|
||||||
code->psubw(xmm_a, xmm_b);
|
code.psubw(xmm_a, xmm_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||||
ctx.EraseInstruction(ge_inst);
|
ctx.EraseInstruction(ge_inst);
|
||||||
|
@ -251,18 +251,18 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm();
|
||||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->pxor(xmm_ge, xmm_ge);
|
code.pxor(xmm_ge, xmm_ge);
|
||||||
code->movdqa(saturated_diff, xmm_a);
|
code.movdqa(saturated_diff, xmm_a);
|
||||||
code->psubsw(saturated_diff, xmm_b);
|
code.psubsw(saturated_diff, xmm_b);
|
||||||
code->pcmpgtw(xmm_ge, saturated_diff);
|
code.pcmpgtw(xmm_ge, saturated_diff);
|
||||||
code->pcmpeqw(saturated_diff, saturated_diff);
|
code.pcmpeqw(saturated_diff, saturated_diff);
|
||||||
code->pxor(xmm_ge, saturated_diff);
|
code.pxor(xmm_ge, saturated_diff);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||||
ctx.EraseInstruction(ge_inst);
|
ctx.EraseInstruction(ge_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
code->psubw(xmm_a, xmm_b);
|
code.psubw(xmm_a, xmm_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
@ -280,11 +280,11 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// Therefore,
|
// Therefore,
|
||||||
// ~pavg(~a, ~b) == (a + b) >> 1
|
// ~pavg(~a, ~b) == (a + b) >> 1
|
||||||
|
|
||||||
code->pcmpeqb(ones, ones);
|
code.pcmpeqb(ones, ones);
|
||||||
code->pxor(xmm_a, ones);
|
code.pxor(xmm_a, ones);
|
||||||
code->pxor(xmm_b, ones);
|
code.pxor(xmm_b, ones);
|
||||||
code->pavgb(xmm_a, xmm_b);
|
code.pavgb(xmm_a, xmm_b);
|
||||||
code->pxor(xmm_a, ones);
|
code.pxor(xmm_a, ones);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
} else {
|
} else {
|
||||||
|
@ -299,12 +299,12 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
||||||
// We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
|
// We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
|
||||||
|
|
||||||
code->mov(xor_a_b, reg_a);
|
code.mov(xor_a_b, reg_a);
|
||||||
code->and_(and_a_b, reg_b);
|
code.and_(and_a_b, reg_b);
|
||||||
code->xor_(xor_a_b, reg_b);
|
code.xor_(xor_a_b, reg_b);
|
||||||
code->shr(xor_a_b, 1);
|
code.shr(xor_a_b, 1);
|
||||||
code->and_(xor_a_b, 0x7F7F7F7F);
|
code.and_(xor_a_b, 0x7F7F7F7F);
|
||||||
code->add(result, xor_a_b);
|
code.add(result, xor_a_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -318,11 +318,11 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->movdqa(tmp, xmm_a);
|
code.movdqa(tmp, xmm_a);
|
||||||
code->pand(xmm_a, xmm_b);
|
code.pand(xmm_a, xmm_b);
|
||||||
code->pxor(tmp, xmm_b);
|
code.pxor(tmp, xmm_b);
|
||||||
code->psrlw(tmp, 1);
|
code.psrlw(tmp, 1);
|
||||||
code->paddw(xmm_a, tmp);
|
code.paddw(xmm_a, tmp);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
} else {
|
} else {
|
||||||
|
@ -337,12 +337,12 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
||||||
// We mask by 0x7FFF to remove the LSB so that it doesn't leak into the field below.
|
// We mask by 0x7FFF to remove the LSB so that it doesn't leak into the field below.
|
||||||
|
|
||||||
code->mov(xor_a_b, reg_a);
|
code.mov(xor_a_b, reg_a);
|
||||||
code->and_(and_a_b, reg_b);
|
code.and_(and_a_b, reg_b);
|
||||||
code->xor_(xor_a_b, reg_b);
|
code.xor_(xor_a_b, reg_b);
|
||||||
code->shr(xor_a_b, 1);
|
code.shr(xor_a_b, 1);
|
||||||
code->and_(xor_a_b, 0x7FFF7FFF);
|
code.and_(xor_a_b, 0x7FFF7FFF);
|
||||||
code->add(result, xor_a_b);
|
code.add(result, xor_a_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -364,15 +364,15 @@ void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
|
// We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
|
||||||
// carry propagates the sign bit from (x^y)>>1 upwards by one.
|
// carry propagates the sign bit from (x^y)>>1 upwards by one.
|
||||||
|
|
||||||
code->mov(xor_a_b, reg_a);
|
code.mov(xor_a_b, reg_a);
|
||||||
code->and_(and_a_b, reg_b);
|
code.and_(and_a_b, reg_b);
|
||||||
code->xor_(xor_a_b, reg_b);
|
code.xor_(xor_a_b, reg_b);
|
||||||
code->mov(carry, xor_a_b);
|
code.mov(carry, xor_a_b);
|
||||||
code->and_(carry, 0x80808080);
|
code.and_(carry, 0x80808080);
|
||||||
code->shr(xor_a_b, 1);
|
code.shr(xor_a_b, 1);
|
||||||
code->and_(xor_a_b, 0x7F7F7F7F);
|
code.and_(xor_a_b, 0x7F7F7F7F);
|
||||||
code->add(result, xor_a_b);
|
code.add(result, xor_a_b);
|
||||||
code->xor_(result, carry);
|
code.xor_(result, carry);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
}
|
}
|
||||||
|
@ -389,11 +389,11 @@ void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>>1).
|
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>>1).
|
||||||
// The arithmetic shift right makes this signed.
|
// The arithmetic shift right makes this signed.
|
||||||
|
|
||||||
code->movdqa(tmp, xmm_a);
|
code.movdqa(tmp, xmm_a);
|
||||||
code->pand(xmm_a, xmm_b);
|
code.pand(xmm_a, xmm_b);
|
||||||
code->pxor(tmp, xmm_b);
|
code.pxor(tmp, xmm_b);
|
||||||
code->psraw(tmp, 1);
|
code.psraw(tmp, 1);
|
||||||
code->paddw(xmm_a, tmp);
|
code.paddw(xmm_a, tmp);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
@ -408,9 +408,9 @@ void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// Note that x^y always contains the LSB of the result.
|
// Note that x^y always contains the LSB of the result.
|
||||||
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
||||||
|
|
||||||
code->xor_(minuend, subtrahend);
|
code.xor_(minuend, subtrahend);
|
||||||
code->and_(subtrahend, minuend);
|
code.and_(subtrahend, minuend);
|
||||||
code->shr(minuend, 1);
|
code.shr(minuend, 1);
|
||||||
|
|
||||||
// At this point,
|
// At this point,
|
||||||
// minuend := (a^b) >> 1
|
// minuend := (a^b) >> 1
|
||||||
|
@ -420,9 +420,9 @@ void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// We can do this because minuend contains 7 bit fields.
|
// We can do this because minuend contains 7 bit fields.
|
||||||
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
||||||
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
||||||
code->or_(minuend, 0x80808080);
|
code.or_(minuend, 0x80808080);
|
||||||
code->sub(minuend, subtrahend);
|
code.sub(minuend, subtrahend);
|
||||||
code->xor_(minuend, 0x80808080);
|
code.xor_(minuend, 0x80808080);
|
||||||
|
|
||||||
// minuend now contains the desired result.
|
// minuend now contains the desired result.
|
||||||
ctx.reg_alloc.DefineValue(inst, minuend);
|
ctx.reg_alloc.DefineValue(inst, minuend);
|
||||||
|
@ -440,11 +440,11 @@ void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// Note that x^y always contains the LSB of the result.
|
// Note that x^y always contains the LSB of the result.
|
||||||
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
||||||
|
|
||||||
code->xor_(minuend, subtrahend);
|
code.xor_(minuend, subtrahend);
|
||||||
code->and_(subtrahend, minuend);
|
code.and_(subtrahend, minuend);
|
||||||
code->mov(carry, minuend);
|
code.mov(carry, minuend);
|
||||||
code->and_(carry, 0x80808080);
|
code.and_(carry, 0x80808080);
|
||||||
code->shr(minuend, 1);
|
code.shr(minuend, 1);
|
||||||
|
|
||||||
// At this point,
|
// At this point,
|
||||||
// minuend := (a^b) >> 1
|
// minuend := (a^b) >> 1
|
||||||
|
@ -456,10 +456,10 @@ void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
||||||
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
||||||
// We then sign extend the result into this bit.
|
// We then sign extend the result into this bit.
|
||||||
code->or_(minuend, 0x80808080);
|
code.or_(minuend, 0x80808080);
|
||||||
code->sub(minuend, subtrahend);
|
code.sub(minuend, subtrahend);
|
||||||
code->xor_(minuend, 0x80808080);
|
code.xor_(minuend, 0x80808080);
|
||||||
code->xor_(minuend, carry);
|
code.xor_(minuend, carry);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, minuend);
|
ctx.reg_alloc.DefineValue(inst, minuend);
|
||||||
}
|
}
|
||||||
|
@ -474,15 +474,15 @@ void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// Note that x^y always contains the LSB of the result.
|
// Note that x^y always contains the LSB of the result.
|
||||||
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
||||||
|
|
||||||
code->pxor(minuend, subtrahend);
|
code.pxor(minuend, subtrahend);
|
||||||
code->pand(subtrahend, minuend);
|
code.pand(subtrahend, minuend);
|
||||||
code->psrlw(minuend, 1);
|
code.psrlw(minuend, 1);
|
||||||
|
|
||||||
// At this point,
|
// At this point,
|
||||||
// minuend := (a^b) >> 1
|
// minuend := (a^b) >> 1
|
||||||
// subtrahend := (a^b) & b
|
// subtrahend := (a^b) & b
|
||||||
|
|
||||||
code->psubw(minuend, subtrahend);
|
code.psubw(minuend, subtrahend);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, minuend);
|
ctx.reg_alloc.DefineValue(inst, minuend);
|
||||||
}
|
}
|
||||||
|
@ -497,20 +497,20 @@ void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
// Note that x^y always contains the LSB of the result.
|
// Note that x^y always contains the LSB of the result.
|
||||||
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>>1) - ((x^y)&y).
|
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>>1) - ((x^y)&y).
|
||||||
|
|
||||||
code->pxor(minuend, subtrahend);
|
code.pxor(minuend, subtrahend);
|
||||||
code->pand(subtrahend, minuend);
|
code.pand(subtrahend, minuend);
|
||||||
code->psraw(minuend, 1);
|
code.psraw(minuend, 1);
|
||||||
|
|
||||||
// At this point,
|
// At this point,
|
||||||
// minuend := (a^b) >>> 1
|
// minuend := (a^b) >>> 1
|
||||||
// subtrahend := (a^b) & b
|
// subtrahend := (a^b) & b
|
||||||
|
|
||||||
code->psubw(minuend, subtrahend);
|
code.psubw(minuend, subtrahend);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, minuend);
|
ctx.reg_alloc.DefineValue(inst, minuend);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
|
void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
@ -521,25 +521,25 @@ void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool
|
||||||
Xbyak::Reg32 reg_sum, reg_diff;
|
Xbyak::Reg32 reg_sum, reg_diff;
|
||||||
|
|
||||||
if (is_signed) {
|
if (is_signed) {
|
||||||
code->movsx(reg_a_lo, reg_a_hi.cvt16());
|
code.movsx(reg_a_lo, reg_a_hi.cvt16());
|
||||||
code->movsx(reg_b_lo, reg_b_hi.cvt16());
|
code.movsx(reg_b_lo, reg_b_hi.cvt16());
|
||||||
code->sar(reg_a_hi, 16);
|
code.sar(reg_a_hi, 16);
|
||||||
code->sar(reg_b_hi, 16);
|
code.sar(reg_b_hi, 16);
|
||||||
} else {
|
} else {
|
||||||
code->movzx(reg_a_lo, reg_a_hi.cvt16());
|
code.movzx(reg_a_lo, reg_a_hi.cvt16());
|
||||||
code->movzx(reg_b_lo, reg_b_hi.cvt16());
|
code.movzx(reg_b_lo, reg_b_hi.cvt16());
|
||||||
code->shr(reg_a_hi, 16);
|
code.shr(reg_a_hi, 16);
|
||||||
code->shr(reg_b_hi, 16);
|
code.shr(reg_b_hi, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hi_is_sum) {
|
if (hi_is_sum) {
|
||||||
code->sub(reg_a_lo, reg_b_hi);
|
code.sub(reg_a_lo, reg_b_hi);
|
||||||
code->add(reg_a_hi, reg_b_lo);
|
code.add(reg_a_hi, reg_b_lo);
|
||||||
reg_diff = reg_a_lo;
|
reg_diff = reg_a_lo;
|
||||||
reg_sum = reg_a_hi;
|
reg_sum = reg_a_hi;
|
||||||
} else {
|
} else {
|
||||||
code->add(reg_a_lo, reg_b_hi);
|
code.add(reg_a_lo, reg_b_hi);
|
||||||
code->sub(reg_a_hi, reg_b_lo);
|
code.sub(reg_a_hi, reg_b_lo);
|
||||||
reg_diff = reg_a_hi;
|
reg_diff = reg_a_hi;
|
||||||
reg_sum = reg_a_lo;
|
reg_sum = reg_a_lo;
|
||||||
}
|
}
|
||||||
|
@ -549,36 +549,36 @@ void EmitPackedSubAdd(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, bool
|
||||||
Xbyak::Reg32 ge_sum = reg_b_hi;
|
Xbyak::Reg32 ge_sum = reg_b_hi;
|
||||||
Xbyak::Reg32 ge_diff = reg_b_lo;
|
Xbyak::Reg32 ge_diff = reg_b_lo;
|
||||||
|
|
||||||
code->mov(ge_sum, reg_sum);
|
code.mov(ge_sum, reg_sum);
|
||||||
code->mov(ge_diff, reg_diff);
|
code.mov(ge_diff, reg_diff);
|
||||||
|
|
||||||
if (!is_signed) {
|
if (!is_signed) {
|
||||||
code->shl(ge_sum, 15);
|
code.shl(ge_sum, 15);
|
||||||
code->sar(ge_sum, 31);
|
code.sar(ge_sum, 31);
|
||||||
} else {
|
} else {
|
||||||
code->not_(ge_sum);
|
code.not_(ge_sum);
|
||||||
code->sar(ge_sum, 31);
|
code.sar(ge_sum, 31);
|
||||||
}
|
}
|
||||||
code->not_(ge_diff);
|
code.not_(ge_diff);
|
||||||
code->sar(ge_diff, 31);
|
code.sar(ge_diff, 31);
|
||||||
code->and_(ge_sum, hi_is_sum ? 0xFFFF0000 : 0x0000FFFF);
|
code.and_(ge_sum, hi_is_sum ? 0xFFFF0000 : 0x0000FFFF);
|
||||||
code->and_(ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
|
code.and_(ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
|
||||||
code->or_(ge_sum, ge_diff);
|
code.or_(ge_sum, ge_diff);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
|
ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
|
||||||
ctx.EraseInstruction(ge_inst);
|
ctx.EraseInstruction(ge_inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_halving) {
|
if (is_halving) {
|
||||||
code->shl(reg_a_lo, 15);
|
code.shl(reg_a_lo, 15);
|
||||||
code->shr(reg_a_hi, 1);
|
code.shr(reg_a_hi, 1);
|
||||||
} else {
|
} else {
|
||||||
code->shl(reg_a_lo, 16);
|
code.shl(reg_a_lo, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
// reg_a_lo now contains the low word and reg_a_hi now contains the high word.
|
// reg_a_lo now contains the low word and reg_a_hi now contains the high word.
|
||||||
// Merge them.
|
// Merge them.
|
||||||
code->shld(reg_a_hi, reg_a_lo, 16);
|
code.shld(reg_a_hi, reg_a_lo, 16);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, reg_a_hi);
|
ctx.reg_alloc.DefineValue(inst, reg_a_hi);
|
||||||
}
|
}
|
||||||
|
@ -615,13 +615,13 @@ void EmitX64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitPackedSubAdd(code, ctx, inst, false, true, true);
|
EmitPackedSubAdd(code, ctx, inst, false, true, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void EmitPackedOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
|
static void EmitPackedOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
(code->*fn)(xmm_a, xmm_b);
|
(code.*fn)(xmm_a, xmm_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
@ -672,19 +672,19 @@ void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm to = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm to = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[2]);
|
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[2]);
|
||||||
|
|
||||||
code->pand(from, ge);
|
code.pand(from, ge);
|
||||||
code->pandn(ge, to);
|
code.pandn(ge, to);
|
||||||
code->por(from, ge);
|
code.por(from, ge);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, from);
|
ctx.reg_alloc.DefineValue(inst, from);
|
||||||
} else if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI1)) {
|
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI1)) {
|
||||||
Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||||
Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
||||||
Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
|
Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
|
||||||
|
|
||||||
code->and_(from, ge);
|
code.and_(from, ge);
|
||||||
code->andn(to, ge, to);
|
code.andn(to, ge, to);
|
||||||
code->or_(from, to);
|
code.or_(from, to);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, from);
|
ctx.reg_alloc.DefineValue(inst, from);
|
||||||
} else {
|
} else {
|
||||||
|
@ -692,10 +692,10 @@ void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||||
Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
|
Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
|
||||||
|
|
||||||
code->and_(from, ge);
|
code.and_(from, ge);
|
||||||
code->not_(ge);
|
code.not_(ge);
|
||||||
code->and_(ge, to);
|
code.and_(ge, to);
|
||||||
code->or_(from, ge);
|
code.or_(from, ge);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, from);
|
ctx.reg_alloc.DefineValue(inst, from);
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,15 +26,15 @@ void EmitX64::EmitSignedSaturatedAdd(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Reg32 addend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
Xbyak::Reg32 addend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||||
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
code->mov(overflow, result);
|
code.mov(overflow, result);
|
||||||
code->shr(overflow, 31);
|
code.shr(overflow, 31);
|
||||||
code->add(overflow, 0x7FFFFFFF);
|
code.add(overflow, 0x7FFFFFFF);
|
||||||
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
||||||
code->add(result, addend);
|
code.add(result, addend);
|
||||||
code->cmovo(result, overflow);
|
code.cmovo(result, overflow);
|
||||||
|
|
||||||
if (overflow_inst) {
|
if (overflow_inst) {
|
||||||
code->seto(overflow.cvt8());
|
code.seto(overflow.cvt8());
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||||
ctx.EraseInstruction(overflow_inst);
|
ctx.EraseInstruction(overflow_inst);
|
||||||
|
@ -52,15 +52,15 @@ void EmitX64::EmitSignedSaturatedSub(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Reg32 subend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
Xbyak::Reg32 subend = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||||
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
code->mov(overflow, result);
|
code.mov(overflow, result);
|
||||||
code->shr(overflow, 31);
|
code.shr(overflow, 31);
|
||||||
code->add(overflow, 0x7FFFFFFF);
|
code.add(overflow, 0x7FFFFFFF);
|
||||||
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
||||||
code->sub(result, subend);
|
code.sub(result, subend);
|
||||||
code->cmovo(result, overflow);
|
code.cmovo(result, overflow);
|
||||||
|
|
||||||
if (overflow_inst) {
|
if (overflow_inst) {
|
||||||
code->seto(overflow.cvt8());
|
code.seto(overflow.cvt8());
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||||
ctx.EraseInstruction(overflow_inst);
|
ctx.EraseInstruction(overflow_inst);
|
||||||
|
@ -83,14 +83,14 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
|
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
|
||||||
code->xor_(overflow, overflow);
|
code.xor_(overflow, overflow);
|
||||||
code->cmp(reg_a, saturated_value);
|
code.cmp(reg_a, saturated_value);
|
||||||
code->mov(result, saturated_value);
|
code.mov(result, saturated_value);
|
||||||
code->cmovle(result, overflow);
|
code.cmovle(result, overflow);
|
||||||
code->cmovbe(result, reg_a);
|
code.cmovbe(result, reg_a);
|
||||||
|
|
||||||
if (overflow_inst) {
|
if (overflow_inst) {
|
||||||
code->seta(overflow.cvt8());
|
code.seta(overflow.cvt8());
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||||
ctx.EraseInstruction(overflow_inst);
|
ctx.EraseInstruction(overflow_inst);
|
||||||
|
@ -126,20 +126,20 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
|
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
|
||||||
code->lea(overflow, code->ptr[reg_a.cvt64() + negative_saturated_value]);
|
code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]);
|
||||||
|
|
||||||
// Put the appropriate saturated value in result
|
// Put the appropriate saturated value in result
|
||||||
code->cmp(reg_a, positive_saturated_value);
|
code.cmp(reg_a, positive_saturated_value);
|
||||||
code->mov(tmp, positive_saturated_value);
|
code.mov(tmp, positive_saturated_value);
|
||||||
code->mov(result, sext_negative_satured_value);
|
code.mov(result, sext_negative_satured_value);
|
||||||
code->cmovg(result, tmp);
|
code.cmovg(result, tmp);
|
||||||
|
|
||||||
// Do the saturation
|
// Do the saturation
|
||||||
code->cmp(overflow, mask);
|
code.cmp(overflow, mask);
|
||||||
code->cmovbe(result, reg_a);
|
code.cmovbe(result, reg_a);
|
||||||
|
|
||||||
if (overflow_inst) {
|
if (overflow_inst) {
|
||||||
code->seta(overflow.cvt8());
|
code.seta(overflow.cvt8());
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||||
ctx.EraseInstruction(overflow_inst);
|
ctx.EraseInstruction(overflow_inst);
|
||||||
|
|
|
@ -17,13 +17,13 @@ namespace Dynarmic::BackendX64 {
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
|
|
||||||
template <typename Function>
|
template <typename Function>
|
||||||
static void EmitVectorOperation(BlockOfCode* code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
(code->*fn)(xmm_a, xmm_b);
|
(code.*fn)(xmm_a, xmm_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
@ -35,15 +35,15 @@ void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
code->pextrb(dest, source, index);
|
code.pextrb(dest, source, index);
|
||||||
ctx.reg_alloc.DefineValue(inst, dest);
|
ctx.reg_alloc.DefineValue(inst, dest);
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
code->pextrw(dest, source, index / 2);
|
code.pextrw(dest, source, index / 2);
|
||||||
if (index % 2 == 1) {
|
if (index % 2 == 1) {
|
||||||
code->shr(dest, 8);
|
code.shr(dest, 8);
|
||||||
}
|
}
|
||||||
ctx.reg_alloc.DefineValue(inst, dest);
|
ctx.reg_alloc.DefineValue(inst, dest);
|
||||||
}
|
}
|
||||||
|
@ -56,7 +56,7 @@ void EmitX64::EmitVectorGetElement16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
code->pextrw(dest, source, index);
|
code.pextrw(dest, source, index);
|
||||||
ctx.reg_alloc.DefineValue(inst, dest);
|
ctx.reg_alloc.DefineValue(inst, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,14 +69,14 @@ void EmitX64::EmitVectorGetElement32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (index == 0) {
|
if (index == 0) {
|
||||||
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
code->movd(dest, source);
|
code.movd(dest, source);
|
||||||
} else if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
code->pextrd(dest, source, index);
|
code.pextrd(dest, source, index);
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
code->pshufd(source, source, index);
|
code.pshufd(source, source, index);
|
||||||
code->movd(dest, source);
|
code.movd(dest, source);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, dest);
|
ctx.reg_alloc.DefineValue(inst, dest);
|
||||||
|
@ -91,14 +91,14 @@ void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
if (index == 0) {
|
if (index == 0) {
|
||||||
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
code->movq(dest, source);
|
code.movq(dest, source);
|
||||||
} else if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
code->pextrq(dest, source, 1);
|
code.pextrq(dest, source, 1);
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
code->punpckhqdq(source, source);
|
code.punpckhqdq(source, source);
|
||||||
code->movq(dest, source);
|
code.movq(dest, source);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, dest);
|
ctx.reg_alloc.DefineValue(inst, dest);
|
||||||
|
@ -109,11 +109,11 @@ void EmitX64::EmitVectorSetElement8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ASSERT(args[1].IsImmediate());
|
ASSERT(args[1].IsImmediate());
|
||||||
u8 index = args[1].GetImmediateU8();
|
u8 index = args[1].GetImmediateU8();
|
||||||
|
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Reg8 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt8();
|
Xbyak::Reg8 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt8();
|
||||||
|
|
||||||
code->pinsrb(source_vector, source_elem.cvt32(), index);
|
code.pinsrb(source_vector, source_elem.cvt32(), index);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, source_vector);
|
ctx.reg_alloc.DefineValue(inst, source_vector);
|
||||||
} else {
|
} else {
|
||||||
|
@ -121,17 +121,17 @@ void EmitX64::EmitVectorSetElement8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Reg32 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
|
Xbyak::Reg32 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
|
||||||
Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
code->pextrw(tmp, source_vector, index / 2);
|
code.pextrw(tmp, source_vector, index / 2);
|
||||||
if (index % 2 == 0) {
|
if (index % 2 == 0) {
|
||||||
code->and_(tmp, 0xFF00);
|
code.and_(tmp, 0xFF00);
|
||||||
code->and_(source_elem, 0x00FF);
|
code.and_(source_elem, 0x00FF);
|
||||||
code->or_(tmp, source_elem);
|
code.or_(tmp, source_elem);
|
||||||
} else {
|
} else {
|
||||||
code->and_(tmp, 0x00FF);
|
code.and_(tmp, 0x00FF);
|
||||||
code->shl(source_elem, 8);
|
code.shl(source_elem, 8);
|
||||||
code->or_(tmp, source_elem);
|
code.or_(tmp, source_elem);
|
||||||
}
|
}
|
||||||
code->pinsrw(source_vector, tmp, index / 2);
|
code.pinsrw(source_vector, tmp, index / 2);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, source_vector);
|
ctx.reg_alloc.DefineValue(inst, source_vector);
|
||||||
}
|
}
|
||||||
|
@ -145,7 +145,7 @@ void EmitX64::EmitVectorSetElement16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Reg16 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt16();
|
Xbyak::Reg16 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt16();
|
||||||
|
|
||||||
code->pinsrw(source_vector, source_elem.cvt32(), index);
|
code.pinsrw(source_vector, source_elem.cvt32(), index);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, source_vector);
|
ctx.reg_alloc.DefineValue(inst, source_vector);
|
||||||
}
|
}
|
||||||
|
@ -155,20 +155,20 @@ void EmitX64::EmitVectorSetElement32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ASSERT(args[1].IsImmediate());
|
ASSERT(args[1].IsImmediate());
|
||||||
u8 index = args[1].GetImmediateU8();
|
u8 index = args[1].GetImmediateU8();
|
||||||
|
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Reg32 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt32();
|
Xbyak::Reg32 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt32();
|
||||||
|
|
||||||
code->pinsrd(source_vector, source_elem, index);
|
code.pinsrd(source_vector, source_elem, index);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, source_vector);
|
ctx.reg_alloc.DefineValue(inst, source_vector);
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Reg32 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
|
Xbyak::Reg32 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
|
||||||
|
|
||||||
code->pinsrw(source_vector, source_elem, index * 2);
|
code.pinsrw(source_vector, source_elem, index * 2);
|
||||||
code->shr(source_elem, 16);
|
code.shr(source_elem, 16);
|
||||||
code->pinsrw(source_vector, source_elem, index * 2 + 1);
|
code.pinsrw(source_vector, source_elem, index * 2 + 1);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, source_vector);
|
ctx.reg_alloc.DefineValue(inst, source_vector);
|
||||||
}
|
}
|
||||||
|
@ -179,24 +179,24 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ASSERT(args[1].IsImmediate());
|
ASSERT(args[1].IsImmediate());
|
||||||
u8 index = args[1].GetImmediateU8();
|
u8 index = args[1].GetImmediateU8();
|
||||||
|
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(args[2]);
|
Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(args[2]);
|
||||||
|
|
||||||
code->pinsrq(source_vector, source_elem, index);
|
code.pinsrq(source_vector, source_elem, index);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, source_vector);
|
ctx.reg_alloc.DefineValue(inst, source_vector);
|
||||||
} else {
|
} else {
|
||||||
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Reg64 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]);
|
Xbyak::Reg64 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]);
|
||||||
|
|
||||||
code->pinsrw(source_vector, source_elem.cvt32(), index * 4);
|
code.pinsrw(source_vector, source_elem.cvt32(), index * 4);
|
||||||
code->shr(source_elem, 16);
|
code.shr(source_elem, 16);
|
||||||
code->pinsrw(source_vector, source_elem.cvt32(), index * 4 + 1);
|
code.pinsrw(source_vector, source_elem.cvt32(), index * 4 + 1);
|
||||||
code->shr(source_elem, 16);
|
code.shr(source_elem, 16);
|
||||||
code->pinsrw(source_vector, source_elem.cvt32(), index * 4 + 2);
|
code.pinsrw(source_vector, source_elem.cvt32(), index * 4 + 2);
|
||||||
code->shr(source_elem, 16);
|
code.shr(source_elem, 16);
|
||||||
code->pinsrw(source_vector, source_elem.cvt32(), index * 4 + 3);
|
code.pinsrw(source_vector, source_elem.cvt32(), index * 4 + 3);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, source_vector);
|
ctx.reg_alloc.DefineValue(inst, source_vector);
|
||||||
}
|
}
|
||||||
|
@ -227,15 +227,15 @@ void EmitX64::EmitVectorLowerBroadcast8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
||||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->pxor(tmp, tmp);
|
code.pxor(tmp, tmp);
|
||||||
code->pshufb(a, tmp);
|
code.pshufb(a, tmp);
|
||||||
code->movq(a, a);
|
code.movq(a, a);
|
||||||
} else {
|
} else {
|
||||||
code->punpcklbw(a, a);
|
code.punpcklbw(a, a);
|
||||||
code->pshuflw(a, a, 0);
|
code.pshuflw(a, a, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
@ -246,7 +246,7 @@ void EmitX64::EmitVectorLowerBroadcast16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->pshuflw(a, a, 0);
|
code.pshuflw(a, a, 0);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
@ -256,7 +256,7 @@ void EmitX64::EmitVectorLowerBroadcast32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->pshuflw(a, a, 0b01000100);
|
code.pshuflw(a, a, 0b01000100);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
@ -266,15 +266,15 @@ void EmitX64::EmitVectorBroadcast8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
||||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->pxor(tmp, tmp);
|
code.pxor(tmp, tmp);
|
||||||
code->pshufb(a, tmp);
|
code.pshufb(a, tmp);
|
||||||
} else {
|
} else {
|
||||||
code->punpcklbw(a, a);
|
code.punpcklbw(a, a);
|
||||||
code->pshuflw(a, a, 0);
|
code.pshuflw(a, a, 0);
|
||||||
code->punpcklqdq(a, a);
|
code.punpcklqdq(a, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
@ -285,8 +285,8 @@ void EmitX64::EmitVectorBroadcast16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->pshuflw(a, a, 0);
|
code.pshuflw(a, a, 0);
|
||||||
code->punpcklqdq(a, a);
|
code.punpcklqdq(a, a);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
@ -296,7 +296,7 @@ void EmitX64::EmitVectorBroadcast32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->pshufd(a, a, 0);
|
code.pshufd(a, a, 0);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
@ -306,7 +306,7 @@ void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->punpcklqdq(a, a);
|
code.punpcklqdq(a, a);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
@ -325,8 +325,8 @@ void EmitX64::EmitVectorNot(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->pcmpeqw(xmm_b, xmm_b);
|
code.pcmpeqw(xmm_b, xmm_b);
|
||||||
code->pxor(xmm_a, xmm_b);
|
code.pxor(xmm_a, xmm_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
@ -344,7 +344,7 @@ void EmitX64::EmitVectorEqual32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pcmpeqq);
|
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pcmpeqq);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -355,9 +355,9 @@ void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->pcmpeqd(xmm_a, xmm_b);
|
code.pcmpeqd(xmm_a, xmm_b);
|
||||||
code->pshufd(tmp, xmm_a, 0b10110001);
|
code.pshufd(tmp, xmm_a, 0b10110001);
|
||||||
code->pand(xmm_a, tmp);
|
code.pand(xmm_a, tmp);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
@ -365,14 +365,14 @@ void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->pcmpeqq(xmm_a, xmm_b);
|
code.pcmpeqq(xmm_a, xmm_b);
|
||||||
code->pshufd(tmp, xmm_a, 0b01001110);
|
code.pshufd(tmp, xmm_a, 0b01001110);
|
||||||
code->pand(xmm_a, tmp);
|
code.pand(xmm_a, tmp);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
} else {
|
} else {
|
||||||
|
@ -380,11 +380,11 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->pcmpeqd(xmm_a, xmm_b);
|
code.pcmpeqd(xmm_a, xmm_b);
|
||||||
code->pshufd(tmp, xmm_a, 0b10110001);
|
code.pshufd(tmp, xmm_a, 0b10110001);
|
||||||
code->pand(xmm_a, tmp);
|
code.pand(xmm_a, tmp);
|
||||||
code->pshufd(tmp, xmm_a, 0b01001110);
|
code.pshufd(tmp, xmm_a, 0b01001110);
|
||||||
code->pand(xmm_a, tmp);
|
code.pand(xmm_a, tmp);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
@ -397,13 +397,13 @@ void EmitX64::EmitVectorLowerPairedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->punpcklqdq(xmm_a, xmm_b);
|
code.punpcklqdq(xmm_a, xmm_b);
|
||||||
code->movdqa(tmp, xmm_a);
|
code.movdqa(tmp, xmm_a);
|
||||||
code->psllw(xmm_a, 8);
|
code.psllw(xmm_a, 8);
|
||||||
code->paddw(xmm_a, tmp);
|
code.paddw(xmm_a, tmp);
|
||||||
code->pxor(tmp, tmp);
|
code.pxor(tmp, tmp);
|
||||||
code->psrlw(xmm_a, 8);
|
code.psrlw(xmm_a, 8);
|
||||||
code->packuswb(xmm_a, tmp);
|
code.packuswb(xmm_a, tmp);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
@ -415,17 +415,17 @@ void EmitX64::EmitVectorLowerPairedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->punpcklqdq(xmm_a, xmm_b);
|
code.punpcklqdq(xmm_a, xmm_b);
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
||||||
code->pxor(tmp, tmp);
|
code.pxor(tmp, tmp);
|
||||||
code->phaddw(xmm_a, tmp);
|
code.phaddw(xmm_a, tmp);
|
||||||
} else {
|
} else {
|
||||||
code->movdqa(tmp, xmm_a);
|
code.movdqa(tmp, xmm_a);
|
||||||
code->pslld(xmm_a, 16);
|
code.pslld(xmm_a, 16);
|
||||||
code->paddd(xmm_a, tmp);
|
code.paddd(xmm_a, tmp);
|
||||||
code->pxor(tmp, tmp);
|
code.pxor(tmp, tmp);
|
||||||
code->psrad(xmm_a, 16);
|
code.psrad(xmm_a, 16);
|
||||||
code->packssdw(xmm_a, tmp); // Note: packusdw is SSE4.1, hence the arithmetic shift above.
|
code.packssdw(xmm_a, tmp); // Note: packusdw is SSE4.1, hence the arithmetic shift above.
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
|
@ -438,16 +438,16 @@ void EmitX64::EmitVectorLowerPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->punpcklqdq(xmm_a, xmm_b);
|
code.punpcklqdq(xmm_a, xmm_b);
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
||||||
code->pxor(tmp, tmp);
|
code.pxor(tmp, tmp);
|
||||||
code->phaddd(xmm_a, tmp);
|
code.phaddd(xmm_a, tmp);
|
||||||
} else {
|
} else {
|
||||||
code->movdqa(tmp, xmm_a);
|
code.movdqa(tmp, xmm_a);
|
||||||
code->psllq(xmm_a, 32);
|
code.psllq(xmm_a, 32);
|
||||||
code->paddq(xmm_a, tmp);
|
code.paddq(xmm_a, tmp);
|
||||||
code->psrlq(xmm_a, 32);
|
code.psrlq(xmm_a, 32);
|
||||||
code->pshufd(xmm_a, xmm_a, 0b11011000);
|
code.pshufd(xmm_a, xmm_a, 0b11011000);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
|
@ -461,15 +461,15 @@ void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
||||||
Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->movdqa(c, a);
|
code.movdqa(c, a);
|
||||||
code->movdqa(d, b);
|
code.movdqa(d, b);
|
||||||
code->psllw(a, 8);
|
code.psllw(a, 8);
|
||||||
code->psllw(b, 8);
|
code.psllw(b, 8);
|
||||||
code->paddw(a, c);
|
code.paddw(a, c);
|
||||||
code->paddw(b, d);
|
code.paddw(b, d);
|
||||||
code->psrlw(a, 8);
|
code.psrlw(a, 8);
|
||||||
code->psrlw(b, 8);
|
code.psrlw(b, 8);
|
||||||
code->packuswb(a, b);
|
code.packuswb(a, b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
@ -477,11 +477,11 @@ void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
||||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
code->phaddw(a, b);
|
code.phaddw(a, b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
} else {
|
} else {
|
||||||
|
@ -490,15 +490,15 @@ void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
||||||
Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->movdqa(c, a);
|
code.movdqa(c, a);
|
||||||
code->movdqa(d, b);
|
code.movdqa(d, b);
|
||||||
code->pslld(a, 16);
|
code.pslld(a, 16);
|
||||||
code->pslld(b, 16);
|
code.pslld(b, 16);
|
||||||
code->paddd(a, c);
|
code.paddd(a, c);
|
||||||
code->paddd(b, d);
|
code.paddd(b, d);
|
||||||
code->psrad(a, 16);
|
code.psrad(a, 16);
|
||||||
code->psrad(b, 16);
|
code.psrad(b, 16);
|
||||||
code->packssdw(a, b);
|
code.packssdw(a, b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
@ -507,11 +507,11 @@ void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSSE3)) {
|
||||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
code->phaddd(a, b);
|
code.phaddd(a, b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
} else {
|
} else {
|
||||||
|
@ -520,13 +520,13 @@ void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
||||||
Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->movdqa(c, a);
|
code.movdqa(c, a);
|
||||||
code->movdqa(d, b);
|
code.movdqa(d, b);
|
||||||
code->psllq(a, 32);
|
code.psllq(a, 32);
|
||||||
code->psllq(b, 32);
|
code.psllq(b, 32);
|
||||||
code->paddq(a, c);
|
code.paddq(a, c);
|
||||||
code->paddq(b, d);
|
code.paddq(b, d);
|
||||||
code->shufps(a, b, 0b11011101);
|
code.shufps(a, b, 0b11011101);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
@ -539,10 +539,10 @@ void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->movdqa(c, a);
|
code.movdqa(c, a);
|
||||||
code->punpcklqdq(a, b);
|
code.punpcklqdq(a, b);
|
||||||
code->punpckhqdq(c, b);
|
code.punpckhqdq(c, b);
|
||||||
code->paddq(a, c);
|
code.paddq(a, c);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
@ -552,7 +552,7 @@ void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
||||||
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
code->movq(a, a); // TODO: !IsLastUse
|
code.movq(a, a); // TODO: !IsLastUse
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, a);
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,7 +14,7 @@ struct BlockOfCode::ExceptionHandler::Impl final {
|
||||||
BlockOfCode::ExceptionHandler::ExceptionHandler() = default;
|
BlockOfCode::ExceptionHandler::ExceptionHandler() = default;
|
||||||
BlockOfCode::ExceptionHandler::~ExceptionHandler() = default;
|
BlockOfCode::ExceptionHandler::~ExceptionHandler() = default;
|
||||||
|
|
||||||
void BlockOfCode::ExceptionHandler::Register(BlockOfCode*) {
|
void BlockOfCode::ExceptionHandler::Register(BlockOfCode&) {
|
||||||
// Do nothing
|
// Do nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -173,11 +173,11 @@ private:
|
||||||
BlockOfCode::ExceptionHandler::ExceptionHandler() = default;
|
BlockOfCode::ExceptionHandler::ExceptionHandler() = default;
|
||||||
BlockOfCode::ExceptionHandler::~ExceptionHandler() = default;
|
BlockOfCode::ExceptionHandler::~ExceptionHandler() = default;
|
||||||
|
|
||||||
void BlockOfCode::ExceptionHandler::Register(BlockOfCode* code) {
|
void BlockOfCode::ExceptionHandler::Register(BlockOfCode& code) {
|
||||||
const auto prolog_info = GetPrologueInformation();
|
const auto prolog_info = GetPrologueInformation();
|
||||||
|
|
||||||
code->align(16);
|
code.align(16);
|
||||||
UNWIND_INFO* unwind_info = static_cast<UNWIND_INFO*>(code->AllocateFromCodeSpace(sizeof(UNWIND_INFO)));
|
UNWIND_INFO* unwind_info = static_cast<UNWIND_INFO*>(code.AllocateFromCodeSpace(sizeof(UNWIND_INFO)));
|
||||||
unwind_info->Version = 1;
|
unwind_info->Version = 1;
|
||||||
unwind_info->Flags = 0; // No special exception handling required.
|
unwind_info->Flags = 0; // No special exception handling required.
|
||||||
unwind_info->SizeOfProlog = prolog_info.prolog_size;
|
unwind_info->SizeOfProlog = prolog_info.prolog_size;
|
||||||
|
@ -186,16 +186,16 @@ void BlockOfCode::ExceptionHandler::Register(BlockOfCode* code) {
|
||||||
unwind_info->FrameOffset = 0; // Unused because FrameRegister == 0
|
unwind_info->FrameOffset = 0; // Unused because FrameRegister == 0
|
||||||
// UNWIND_INFO::UnwindCode field:
|
// UNWIND_INFO::UnwindCode field:
|
||||||
const size_t size_of_unwind_code = sizeof(UNWIND_CODE) * prolog_info.unwind_code.size();
|
const size_t size_of_unwind_code = sizeof(UNWIND_CODE) * prolog_info.unwind_code.size();
|
||||||
UNWIND_CODE* unwind_code = static_cast<UNWIND_CODE*>(code->AllocateFromCodeSpace(size_of_unwind_code));
|
UNWIND_CODE* unwind_code = static_cast<UNWIND_CODE*>(code.AllocateFromCodeSpace(size_of_unwind_code));
|
||||||
memcpy(unwind_code, prolog_info.unwind_code.data(), size_of_unwind_code);
|
memcpy(unwind_code, prolog_info.unwind_code.data(), size_of_unwind_code);
|
||||||
|
|
||||||
code->align(16);
|
code.align(16);
|
||||||
RUNTIME_FUNCTION* rfuncs = static_cast<RUNTIME_FUNCTION*>(code->AllocateFromCodeSpace(sizeof(RUNTIME_FUNCTION)));
|
RUNTIME_FUNCTION* rfuncs = static_cast<RUNTIME_FUNCTION*>(code.AllocateFromCodeSpace(sizeof(RUNTIME_FUNCTION)));
|
||||||
rfuncs->BeginAddress = static_cast<DWORD>(reinterpret_cast<u8*>(code->run_code) - code->getCode());
|
rfuncs->BeginAddress = static_cast<DWORD>(reinterpret_cast<u8*>(code.run_code) - code.getCode());
|
||||||
rfuncs->EndAddress = static_cast<DWORD>(code->maxSize_);
|
rfuncs->EndAddress = static_cast<DWORD>(code.maxSize_);
|
||||||
rfuncs->UnwindData = static_cast<DWORD>(reinterpret_cast<u8*>(unwind_info) - code->getCode());
|
rfuncs->UnwindData = static_cast<DWORD>(reinterpret_cast<u8*>(unwind_info) - code.getCode());
|
||||||
|
|
||||||
impl = std::make_unique<Impl>(rfuncs, code->getCode());
|
impl = std::make_unique<Impl>(rfuncs, code.getCode());
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace BackendX64
|
} // namespace BackendX64
|
||||||
|
|
|
@ -370,10 +370,10 @@ void RegAlloc::HostCall(IR::Inst* result_def, boost::optional<Argument&> arg0, b
|
||||||
Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
|
Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
|
||||||
switch (args[i]->GetType()) {
|
switch (args[i]->GetType()) {
|
||||||
case IR::Type::U8:
|
case IR::Type::U8:
|
||||||
code->movzx(reg.cvt32(), reg.cvt8());
|
code.movzx(reg.cvt32(), reg.cvt8());
|
||||||
break;
|
break;
|
||||||
case IR::Type::U16:
|
case IR::Type::U16:
|
||||||
code->movzx(reg.cvt32(), reg.cvt16());
|
code.movzx(reg.cvt32(), reg.cvt16());
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break; // Nothing needs to be done
|
break; // Nothing needs to be done
|
||||||
|
@ -459,9 +459,9 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
|
||||||
Xbyak::Reg64 reg = HostLocToReg64(host_loc);
|
Xbyak::Reg64 reg = HostLocToReg64(host_loc);
|
||||||
u64 imm_value = ImmediateToU64(imm);
|
u64 imm_value = ImmediateToU64(imm);
|
||||||
if (imm_value == 0)
|
if (imm_value == 0)
|
||||||
code->xor_(reg.cvt32(), reg.cvt32());
|
code.xor_(reg.cvt32(), reg.cvt32());
|
||||||
else
|
else
|
||||||
code->mov(reg, imm_value);
|
code.mov(reg, imm_value);
|
||||||
return host_loc;
|
return host_loc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -469,9 +469,9 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
|
||||||
Xbyak::Xmm reg = HostLocToXmm(host_loc);
|
Xbyak::Xmm reg = HostLocToXmm(host_loc);
|
||||||
u64 imm_value = ImmediateToU64(imm);
|
u64 imm_value = ImmediateToU64(imm);
|
||||||
if (imm_value == 0)
|
if (imm_value == 0)
|
||||||
code->pxor(reg, reg);
|
code.pxor(reg, reg);
|
||||||
else
|
else
|
||||||
code->movdqa(reg, code->MConst(imm_value)); // TODO: movaps/movapd more appropriate sometimes
|
code.movdqa(reg, code.MConst(imm_value)); // TODO: movaps/movapd more appropriate sometimes
|
||||||
return host_loc;
|
return host_loc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -557,42 +557,42 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) {
|
||||||
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
|
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
|
||||||
|
|
||||||
if (HostLocIsXMM(to) && HostLocIsXMM(from)) {
|
if (HostLocIsXMM(to) && HostLocIsXMM(from)) {
|
||||||
code->movaps(HostLocToXmm(to), HostLocToXmm(from));
|
code.movaps(HostLocToXmm(to), HostLocToXmm(from));
|
||||||
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
|
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
|
||||||
ASSERT(bit_width != 128);
|
ASSERT(bit_width != 128);
|
||||||
if (bit_width == 64) {
|
if (bit_width == 64) {
|
||||||
code->mov(HostLocToReg64(to), HostLocToReg64(from));
|
code.mov(HostLocToReg64(to), HostLocToReg64(from));
|
||||||
} else {
|
} else {
|
||||||
code->mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32());
|
code.mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32());
|
||||||
}
|
}
|
||||||
} else if (HostLocIsXMM(to) && HostLocIsGPR(from)) {
|
} else if (HostLocIsXMM(to) && HostLocIsGPR(from)) {
|
||||||
ASSERT(bit_width != 128);
|
ASSERT(bit_width != 128);
|
||||||
if (bit_width == 64) {
|
if (bit_width == 64) {
|
||||||
code->movq(HostLocToXmm(to), HostLocToReg64(from));
|
code.movq(HostLocToXmm(to), HostLocToReg64(from));
|
||||||
} else {
|
} else {
|
||||||
code->movd(HostLocToXmm(to), HostLocToReg64(from).cvt32());
|
code.movd(HostLocToXmm(to), HostLocToReg64(from).cvt32());
|
||||||
}
|
}
|
||||||
} else if (HostLocIsGPR(to) && HostLocIsXMM(from)) {
|
} else if (HostLocIsGPR(to) && HostLocIsXMM(from)) {
|
||||||
ASSERT(bit_width != 128);
|
ASSERT(bit_width != 128);
|
||||||
if (bit_width == 64) {
|
if (bit_width == 64) {
|
||||||
code->movq(HostLocToReg64(to), HostLocToXmm(from));
|
code.movq(HostLocToReg64(to), HostLocToXmm(from));
|
||||||
} else {
|
} else {
|
||||||
code->movd(HostLocToReg64(to).cvt32(), HostLocToXmm(from));
|
code.movd(HostLocToReg64(to).cvt32(), HostLocToXmm(from));
|
||||||
}
|
}
|
||||||
} else if (HostLocIsXMM(to) && HostLocIsSpill(from)) {
|
} else if (HostLocIsXMM(to) && HostLocIsSpill(from)) {
|
||||||
Xbyak::Address spill_addr = spill_to_addr(from);
|
Xbyak::Address spill_addr = spill_to_addr(from);
|
||||||
ASSERT(spill_addr.getBit() >= bit_width);
|
ASSERT(spill_addr.getBit() >= bit_width);
|
||||||
switch (bit_width) {
|
switch (bit_width) {
|
||||||
case 128:
|
case 128:
|
||||||
code->movaps(HostLocToXmm(to), spill_addr);
|
code.movaps(HostLocToXmm(to), spill_addr);
|
||||||
break;
|
break;
|
||||||
case 64:
|
case 64:
|
||||||
code->movsd(HostLocToXmm(to), spill_addr);
|
code.movsd(HostLocToXmm(to), spill_addr);
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
case 16:
|
case 16:
|
||||||
case 8:
|
case 8:
|
||||||
code->movss(HostLocToXmm(to), spill_addr);
|
code.movss(HostLocToXmm(to), spill_addr);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
|
@ -602,15 +602,15 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) {
|
||||||
ASSERT(spill_addr.getBit() >= bit_width);
|
ASSERT(spill_addr.getBit() >= bit_width);
|
||||||
switch (bit_width) {
|
switch (bit_width) {
|
||||||
case 128:
|
case 128:
|
||||||
code->movaps(spill_addr, HostLocToXmm(from));
|
code.movaps(spill_addr, HostLocToXmm(from));
|
||||||
break;
|
break;
|
||||||
case 64:
|
case 64:
|
||||||
code->movsd(spill_addr, HostLocToXmm(from));
|
code.movsd(spill_addr, HostLocToXmm(from));
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
case 16:
|
case 16:
|
||||||
case 8:
|
case 8:
|
||||||
code->movss(spill_addr, HostLocToXmm(from));
|
code.movss(spill_addr, HostLocToXmm(from));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
|
@ -618,16 +618,16 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) {
|
||||||
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
|
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
|
||||||
ASSERT(bit_width != 128);
|
ASSERT(bit_width != 128);
|
||||||
if (bit_width == 64) {
|
if (bit_width == 64) {
|
||||||
code->mov(HostLocToReg64(to), spill_to_addr(from));
|
code.mov(HostLocToReg64(to), spill_to_addr(from));
|
||||||
} else {
|
} else {
|
||||||
code->mov(HostLocToReg64(to).cvt32(), spill_to_addr(from));
|
code.mov(HostLocToReg64(to).cvt32(), spill_to_addr(from));
|
||||||
}
|
}
|
||||||
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
|
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
|
||||||
ASSERT(bit_width != 128);
|
ASSERT(bit_width != 128);
|
||||||
if (bit_width == 64) {
|
if (bit_width == 64) {
|
||||||
code->mov(spill_to_addr(to), HostLocToReg64(from));
|
code.mov(spill_to_addr(to), HostLocToReg64(from));
|
||||||
} else {
|
} else {
|
||||||
code->mov(spill_to_addr(to), HostLocToReg64(from).cvt32());
|
code.mov(spill_to_addr(to), HostLocToReg64(from).cvt32());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ASSERT_MSG(false, "Invalid RegAlloc::EmitMove");
|
ASSERT_MSG(false, "Invalid RegAlloc::EmitMove");
|
||||||
|
@ -636,7 +636,7 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) {
|
||||||
|
|
||||||
void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
|
void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
|
||||||
if (HostLocIsGPR(a) && HostLocIsGPR(b)) {
|
if (HostLocIsGPR(a) && HostLocIsGPR(b)) {
|
||||||
code->xchg(HostLocToReg64(a), HostLocToReg64(b));
|
code.xchg(HostLocToReg64(a), HostLocToReg64(b));
|
||||||
} else if (HostLocIsXMM(a) && HostLocIsXMM(b)) {
|
} else if (HostLocIsXMM(a) && HostLocIsXMM(b)) {
|
||||||
ASSERT_MSG(false, "Check your code: Exchanging XMM registers is unnecessary");
|
ASSERT_MSG(false, "Check your code: Exchanging XMM registers is unnecessary");
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -91,7 +91,7 @@ private:
|
||||||
|
|
||||||
class RegAlloc final {
|
class RegAlloc final {
|
||||||
public:
|
public:
|
||||||
explicit RegAlloc(BlockOfCode* code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr)
|
explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr)
|
||||||
: hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
|
: hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
|
||||||
|
|
||||||
std::array<Argument, 3> GetArgumentInfo(IR::Inst* inst);
|
std::array<Argument, 3> GetArgumentInfo(IR::Inst* inst);
|
||||||
|
@ -144,7 +144,7 @@ private:
|
||||||
HostLocInfo& LocInfo(HostLoc loc);
|
HostLocInfo& LocInfo(HostLoc loc);
|
||||||
const HostLocInfo& LocInfo(HostLoc loc) const;
|
const HostLocInfo& LocInfo(HostLoc loc) const;
|
||||||
|
|
||||||
BlockOfCode* code = nullptr;
|
BlockOfCode& code;
|
||||||
std::function<Xbyak::Address(HostLoc)> spill_to_addr;
|
std::function<Xbyak::Address(HostLoc)> spill_to_addr;
|
||||||
void EmitMove(HostLoc to, HostLoc from);
|
void EmitMove(HostLoc to, HostLoc from);
|
||||||
void EmitExchange(HostLoc a, HostLoc b);
|
void EmitExchange(HostLoc a, HostLoc b);
|
||||||
|
|
Loading…
Reference in a new issue