2016-07-01 14:01:06 +01:00
|
|
|
/* This file is part of the dynarmic project.
|
|
|
|
* Copyright (c) 2016 MerryMage
|
|
|
|
* This software may be used and distributed according to the terms of the GNU
|
|
|
|
* General Public License version 2 or any later version.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <unordered_map>
|
|
|
|
|
2016-09-01 00:06:40 +01:00
|
|
|
#include "backend_x64/abi.h"
|
2016-12-05 04:14:58 +00:00
|
|
|
#include "backend_x64/block_of_code.h"
|
2016-07-01 14:01:06 +01:00
|
|
|
#include "backend_x64/emit_x64.h"
|
2016-08-24 20:07:08 +01:00
|
|
|
#include "backend_x64/jitstate.h"
|
2016-12-05 04:11:34 +00:00
|
|
|
#include "common/assert.h"
|
|
|
|
#include "common/bit_util.h"
|
2016-09-06 00:52:33 +01:00
|
|
|
#include "frontend/arm/types.h"
|
2016-09-03 21:48:03 +01:00
|
|
|
#include "frontend/ir/basic_block.h"
|
2016-09-05 11:54:09 +01:00
|
|
|
#include "frontend/ir/location_descriptor.h"
|
2016-09-03 21:48:03 +01:00
|
|
|
#include "frontend/ir/microinstruction.h"
|
2016-12-05 04:11:34 +00:00
|
|
|
#include "frontend/ir/opcodes.h"
|
2016-07-01 14:01:06 +01:00
|
|
|
|
|
|
|
// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary.
|
|
|
|
// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart.
|
|
|
|
|
|
|
|
namespace Dynarmic {
|
|
|
|
namespace BackendX64 {
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
static Xbyak::Address MJitStateReg(Arm::Reg reg) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
return dword[r15 + offsetof(JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg)];
|
2016-07-07 10:53:09 +01:00
|
|
|
}
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
static Xbyak::Address MJitStateExtReg(Arm::ExtReg reg) {
|
|
|
|
using namespace Xbyak::util;
|
2016-09-07 12:08:35 +01:00
|
|
|
if (Arm::IsSingleExtReg(reg)) {
|
2016-08-05 18:54:19 +01:00
|
|
|
size_t index = static_cast<size_t>(reg) - static_cast<size_t>(Arm::ExtReg::S0);
|
2016-08-24 20:07:08 +01:00
|
|
|
return dword[r15 + offsetof(JitState, ExtReg) + sizeof(u32) * index];
|
2016-08-05 18:54:19 +01:00
|
|
|
}
|
2016-09-07 12:08:35 +01:00
|
|
|
if (Arm::IsDoubleExtReg(reg)) {
|
2016-08-05 18:54:19 +01:00
|
|
|
size_t index = static_cast<size_t>(reg) - static_cast<size_t>(Arm::ExtReg::D0);
|
2016-08-24 20:07:08 +01:00
|
|
|
return qword[r15 + offsetof(JitState, ExtReg) + sizeof(u64) * index];
|
2016-08-05 18:54:19 +01:00
|
|
|
}
|
|
|
|
ASSERT_MSG(false, "Should never happen.");
|
|
|
|
}
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
static Xbyak::Address MJitStateCpsr() {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
return dword[r15 + offsetof(JitState, Cpsr)];
|
2016-07-07 10:53:09 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
static void EraseInstruction(IR::Block& block, IR::Inst* inst) {
|
2016-08-26 20:38:59 +01:00
|
|
|
block.Instructions().erase(inst);
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-12-05 04:22:56 +00:00
|
|
|
EmitX64::EmitX64(BlockOfCode* code, UserCallbacks cb, Jit* jit_interface)
|
|
|
|
: reg_alloc(code), code(code), cb(cb), jit_interface(jit_interface) {
|
|
|
|
}
|
|
|
|
|
2016-08-26 19:14:25 +01:00
|
|
|
EmitX64::BlockDescriptor EmitX64::Emit(IR::Block& block) {
|
2016-09-05 11:54:09 +01:00
|
|
|
const IR::LocationDescriptor descriptor = block.Location();
|
2016-08-26 19:14:25 +01:00
|
|
|
|
2016-07-04 14:37:50 +01:00
|
|
|
reg_alloc.Reset();
|
|
|
|
|
2016-08-27 11:04:43 +01:00
|
|
|
code->align();
|
2016-08-24 20:07:08 +01:00
|
|
|
const CodePtr code_ptr = code->getCurr();
|
2016-12-19 15:01:49 +00:00
|
|
|
EmitX64::BlockDescriptor& block_desc = block_descriptors[descriptor.UniqueHash()];
|
|
|
|
block_desc.code_ptr = code_ptr;
|
2016-07-01 14:01:06 +01:00
|
|
|
|
2016-08-18 18:16:18 +01:00
|
|
|
EmitCondPrelude(block);
|
2016-07-14 12:52:53 +01:00
|
|
|
|
basic_block: Add proxy member functions for the instruction list
Currently basic block kind of acts like a 'dumb struct' which makes things
a little more verbose to write (as opposed to keeping it all in one place,
I guess). It's also a little wonky conceptually, considering a block is
composed of instructions (i.e. 'contains' them).
So providing accessors that make it act more like a container can make working
with algorithms a little nicer. It also makes the API a little more
defined.
Ideally, the list would be only available through a function, but
currently, the pool allocator is exposed, which seems somewhat odd,
considering the block itself should manage its overall allocations
(with placement new, and regular new), rather than putting that
sanitizing directly on the IR emitter (it should just care about emission,
not block state). However, recontaining that can be followed up with,
as it's very trivial to do.
2016-08-21 17:35:30 +01:00
|
|
|
for (auto iter = block.begin(); iter != block.end(); ++iter) {
|
2016-07-22 23:55:00 +01:00
|
|
|
IR::Inst* inst = &*iter;
|
2016-07-01 14:01:06 +01:00
|
|
|
|
2016-07-08 08:28:56 +01:00
|
|
|
// Call the relevant Emit* member function.
|
2016-07-22 23:55:00 +01:00
|
|
|
switch (inst->GetOpcode()) {
|
2016-07-08 08:28:56 +01:00
|
|
|
|
2016-08-22 23:40:30 +01:00
|
|
|
#define OPCODE(name, type, ...) \
|
|
|
|
case IR::Opcode::name: \
|
|
|
|
EmitX64::Emit##name(block, inst); \
|
|
|
|
break;
|
2016-07-08 08:28:56 +01:00
|
|
|
#include "frontend/ir/opcodes.inc"
|
|
|
|
#undef OPCODE
|
|
|
|
|
2016-08-22 23:40:30 +01:00
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Invalid opcode %zu", static_cast<size_t>(inst->GetOpcode()));
|
|
|
|
break;
|
2016-07-08 08:28:56 +01:00
|
|
|
}
|
|
|
|
|
2016-07-01 14:01:06 +01:00
|
|
|
reg_alloc.EndOfAllocScope();
|
|
|
|
}
|
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
EmitAddCycles(block.CycleCount());
|
|
|
|
EmitTerminal(block.GetTerminal(), block.Location());
|
2016-08-27 11:04:43 +01:00
|
|
|
code->int3();
|
2016-07-01 14:01:06 +01:00
|
|
|
|
2016-07-11 22:43:53 +01:00
|
|
|
reg_alloc.AssertNoMoreUses();
|
|
|
|
|
2016-08-07 22:11:39 +01:00
|
|
|
Patch(descriptor, code_ptr);
|
2016-12-19 15:01:49 +00:00
|
|
|
block_desc.size = std::intptr_t(code->getCurr()) - std::intptr_t(code_ptr);
|
|
|
|
return block_desc;
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-12-05 04:27:05 +00:00
|
|
|
boost::optional<EmitX64::BlockDescriptor> EmitX64::GetBasicBlock(IR::LocationDescriptor descriptor) const {
|
2016-12-19 15:01:49 +00:00
|
|
|
auto iter = block_descriptors.find(descriptor.UniqueHash());
|
|
|
|
if (iter == block_descriptors.end())
|
2016-12-05 04:22:56 +00:00
|
|
|
return boost::none;
|
|
|
|
return boost::make_optional<BlockDescriptor>(iter->second);
|
|
|
|
}
|
|
|
|
|
2016-08-05 14:07:27 +01:00
|
|
|
void EmitX64::EmitBreakpoint(IR::Block&, IR::Inst*) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->int3();
|
2016-08-05 14:07:27 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitIdentity(IR::Block&, IR::Inst* inst) {
|
2016-08-05 14:11:27 +01:00
|
|
|
if (!inst->GetArg(0).IsImmediate()) {
|
|
|
|
reg_alloc.RegisterAddDef(inst, inst->GetArg(0));
|
|
|
|
}
|
2016-08-02 11:51:05 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitGetRegister(IR::Block&, IR::Inst* inst) {
|
|
|
|
Arm::Reg reg = inst->GetArg(0).GetRegRef();
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
code->mov(result, MJitStateReg(reg));
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitGetExtendedRegister32(IR::Block&, IR::Inst* inst) {
|
2016-08-05 18:54:19 +01:00
|
|
|
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
2016-09-07 12:08:35 +01:00
|
|
|
ASSERT(Arm::IsSingleExtReg(reg));
|
2016-08-06 17:21:29 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.DefXmm(inst);
|
|
|
|
code->movss(result, MJitStateExtReg(reg));
|
2016-08-05 18:54:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitGetExtendedRegister64(IR::Block&, IR::Inst* inst) {
|
|
|
|
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
2016-09-07 12:08:35 +01:00
|
|
|
ASSERT(Arm::IsDoubleExtReg(reg));
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.DefXmm(inst);
|
|
|
|
code->movsd(result, MJitStateExtReg(reg));
|
2016-08-05 18:54:19 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitSetRegister(IR::Block&, IR::Inst* inst) {
|
|
|
|
Arm::Reg reg = inst->GetArg(0).GetRegRef();
|
|
|
|
IR::Value arg = inst->GetArg(1);
|
|
|
|
if (arg.IsImmediate()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->mov(MJitStateReg(reg), arg.GetU32());
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseGpr(arg).cvt32();
|
|
|
|
code->mov(MJitStateReg(reg), to_store);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-08-05 18:54:19 +01:00
|
|
|
void EmitX64::EmitSetExtendedRegister32(IR::Block&, IR::Inst* inst) {
|
|
|
|
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
2016-09-07 12:08:35 +01:00
|
|
|
ASSERT(Arm::IsSingleExtReg(reg));
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(1));
|
|
|
|
code->movss(MJitStateExtReg(reg), source);
|
2016-08-05 18:54:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitSetExtendedRegister64(IR::Block&, IR::Inst* inst) {
|
|
|
|
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
2016-09-07 12:08:35 +01:00
|
|
|
ASSERT(Arm::IsDoubleExtReg(reg));
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(1));
|
|
|
|
code->movsd(MJitStateExtReg(reg), source);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitGetCpsr(IR::Block&, IR::Inst* inst) {
|
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitSetCpsr(IR::Block&, IR::Inst* inst) {
|
|
|
|
Xbyak::Reg32 arg = reg_alloc.UseGpr(inst->GetArg(0)).cvt32();
|
|
|
|
code->mov(MJitStateCpsr(), arg);
|
2016-08-05 18:54:19 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitGetNFlag(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 31);
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitSetNFlag(IR::Block&, IR::Inst* inst) {
|
|
|
|
constexpr size_t flag_bit = 31;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
|
|
|
IR::Value arg = inst->GetArg(0);
|
|
|
|
if (arg.IsImmediate()) {
|
|
|
|
if (arg.GetU1()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32();
|
2016-07-04 10:22:11 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitGetZFlag(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 30);
|
|
|
|
code->and_(result, 1);
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitSetZFlag(IR::Block&, IR::Inst* inst) {
|
|
|
|
constexpr size_t flag_bit = 30;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
|
|
|
IR::Value arg = inst->GetArg(0);
|
|
|
|
if (arg.IsImmediate()) {
|
|
|
|
if (arg.GetU1()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32();
|
2016-07-04 10:22:11 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitGetCFlag(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 29);
|
|
|
|
code->and_(result, 1);
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitSetCFlag(IR::Block&, IR::Inst* inst) {
|
|
|
|
constexpr size_t flag_bit = 29;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
|
|
|
IR::Value arg = inst->GetArg(0);
|
|
|
|
if (arg.IsImmediate()) {
|
|
|
|
if (arg.GetU1()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32();
|
2016-07-04 10:22:11 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitGetVFlag(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 28);
|
|
|
|
code->and_(result, 1);
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitSetVFlag(IR::Block&, IR::Inst* inst) {
|
|
|
|
constexpr size_t flag_bit = 28;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
|
|
|
IR::Value arg = inst->GetArg(0);
|
|
|
|
if (arg.IsImmediate()) {
|
|
|
|
if (arg.GetU1()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32();
|
2016-07-04 10:22:11 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-08-06 22:04:52 +01:00
|
|
|
void EmitX64::EmitOrQFlag(IR::Block&, IR::Inst* inst) {
|
|
|
|
constexpr size_t flag_bit = 27;
|
|
|
|
constexpr u32 flag_mask = 1u << flag_bit;
|
|
|
|
IR::Value arg = inst->GetArg(0);
|
|
|
|
if (arg.IsImmediate()) {
|
|
|
|
if (arg.GetU1())
|
2016-08-24 20:07:08 +01:00
|
|
|
code->or_(MJitStateCpsr(), flag_mask);
|
2016-08-06 22:04:52 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32();
|
2016-08-06 22:04:52 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
2016-08-06 22:04:52 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-11-23 19:44:27 +00:00
|
|
|
void EmitX64::EmitGetGEFlags(IR::Block&, IR::Inst* inst) {
|
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
code->mov(result, MJitStateCpsr());
|
|
|
|
code->shr(result, 16);
|
|
|
|
code->and_(result, 0xF);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitSetGEFlags(IR::Block&, IR::Inst* inst) {
|
|
|
|
constexpr size_t flag_bit = 16;
|
|
|
|
constexpr u32 flag_mask = 0xFu << flag_bit;
|
|
|
|
IR::Value arg = inst->GetArg(0);
|
|
|
|
if (arg.IsImmediate()) {
|
|
|
|
u32 imm = (arg.GetU32() << flag_bit) & flag_mask;
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), imm);
|
|
|
|
} else {
|
|
|
|
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(arg).cvt32();
|
|
|
|
|
|
|
|
code->shl(to_store, flag_bit);
|
|
|
|
code->and_(to_store, flag_mask);
|
|
|
|
code->and_(MJitStateCpsr(), ~flag_mask);
|
|
|
|
code->or_(MJitStateCpsr(), to_store);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitBXWritePC(IR::Block&, IR::Inst* inst) {
|
|
|
|
const u32 T_bit = 1 << 5;
|
|
|
|
auto arg = inst->GetArg(0);
|
2016-07-18 20:01:48 +01:00
|
|
|
|
|
|
|
// Pseudocode:
|
|
|
|
// if (new_pc & 1) {
|
|
|
|
// new_pc &= 0xFFFFFFFE;
|
|
|
|
// cpsr.T = true;
|
|
|
|
// } else {
|
|
|
|
// new_pc &= 0xFFFFFFFC;
|
|
|
|
// cpsr.T = false;
|
|
|
|
// }
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
if (arg.IsImmediate()) {
|
|
|
|
u32 new_pc = arg.GetU32();
|
|
|
|
if (Common::Bit<0>(new_pc)) {
|
|
|
|
new_pc &= 0xFFFFFFFE;
|
2016-08-24 20:07:08 +01:00
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
|
|
|
|
code->or_(MJitStateCpsr(), T_bit);
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
|
|
|
new_pc &= 0xFFFFFFFC;
|
2016-08-24 20:07:08 +01:00
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
|
|
|
|
code->and_(MJitStateCpsr(), ~T_bit);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
using Xbyak::util::ptr;
|
|
|
|
|
|
|
|
Xbyak::Reg64 new_pc = reg_alloc.UseScratchGpr(arg);
|
|
|
|
Xbyak::Reg64 tmp1 = reg_alloc.ScratchGpr();
|
|
|
|
Xbyak::Reg64 tmp2 = reg_alloc.ScratchGpr();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->mov(tmp1, MJitStateCpsr());
|
|
|
|
code->mov(tmp2, tmp1);
|
|
|
|
code->and_(tmp2, u32(~T_bit)); // CPSR.T = 0
|
|
|
|
code->or_(tmp1, u32(T_bit)); // CPSR.T = 1
|
|
|
|
code->test(new_pc, u32(1));
|
|
|
|
code->cmove(tmp1, tmp2); // CPSR.T = pc & 1
|
|
|
|
code->mov(MJitStateCpsr(), tmp1);
|
|
|
|
code->lea(tmp2, ptr[new_pc + new_pc * 1]);
|
|
|
|
code->or_(tmp2, u32(0xFFFFFFFC)); // tmp2 = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
|
|
|
|
code->and_(new_pc, tmp2);
|
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-12 10:58:14 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitCallSupervisor(IR::Block&, IR::Inst* inst) {
|
|
|
|
auto imm32 = inst->GetArg(0);
|
2016-07-14 14:04:43 +01:00
|
|
|
|
|
|
|
reg_alloc.HostCall(nullptr, imm32);
|
|
|
|
|
2016-08-07 22:47:17 +01:00
|
|
|
code->SwitchMxcsrOnExit();
|
2016-08-31 21:09:26 +01:00
|
|
|
code->CallFunction(cb.CallSVC);
|
2016-08-07 22:47:17 +01:00
|
|
|
code->SwitchMxcsrOnEntry();
|
2016-07-14 14:04:43 +01:00
|
|
|
}
|
|
|
|
|
2016-08-26 22:47:54 +01:00
|
|
|
static u32 GetFpscrImpl(JitState* jit_state) {
|
|
|
|
return jit_state->Fpscr();
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitGetFpscr(IR::Block&, IR::Inst* inst) {
|
|
|
|
reg_alloc.HostCall(inst);
|
|
|
|
code->mov(code->ABI_PARAM1, code->r15);
|
|
|
|
|
|
|
|
code->SwitchMxcsrOnExit();
|
2016-08-31 21:09:26 +01:00
|
|
|
code->CallFunction(&GetFpscrImpl);
|
2016-08-26 22:47:54 +01:00
|
|
|
code->SwitchMxcsrOnEntry();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void SetFpscrImpl(u32 value, JitState* jit_state) {
|
|
|
|
jit_state->SetFpscr(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitSetFpscr(IR::Block&, IR::Inst* inst) {
|
|
|
|
auto a = inst->GetArg(0);
|
|
|
|
|
|
|
|
reg_alloc.HostCall(nullptr, a);
|
|
|
|
code->mov(code->ABI_PARAM2, code->r15);
|
|
|
|
|
|
|
|
code->SwitchMxcsrOnExit();
|
2016-08-31 21:09:26 +01:00
|
|
|
code->CallFunction(&SetFpscrImpl);
|
2016-08-26 22:47:54 +01:00
|
|
|
code->SwitchMxcsrOnEntry();
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitGetFpscrNZCV(IR::Block&, IR::Inst* inst) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
|
2016-09-05 14:39:17 +01:00
|
|
|
code->mov(result, dword[r15 + offsetof(JitState, FPSCR_nzcv)]);
|
2016-08-26 22:47:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitSetFpscrNZCV(IR::Block&, IR::Inst* inst) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
|
|
|
Xbyak::Reg32 value = reg_alloc.UseGpr(inst->GetArg(0)).cvt32();
|
|
|
|
|
2016-09-05 14:39:17 +01:00
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_nzcv)], value);
|
2016-08-26 22:47:54 +01:00
|
|
|
}
|
|
|
|
|
2016-08-13 00:10:23 +01:00
|
|
|
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2016-08-13 00:10:23 +01:00
|
|
|
ASSERT(inst->GetArg(0).IsImmediate());
|
2016-12-19 15:01:49 +00:00
|
|
|
u64 unique_hash_of_target = inst->GetArg(0).GetU64();
|
|
|
|
|
|
|
|
auto iter = block_descriptors.find(unique_hash_of_target);
|
|
|
|
CodePtr target_code_ptr = iter != block_descriptors.end()
|
|
|
|
? iter->second.code_ptr
|
|
|
|
: code->GetReturnFromRunCodeAddress();
|
2016-08-13 00:10:23 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
|
|
|
|
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
|
|
|
|
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
|
2016-08-13 00:10:23 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->mov(index_reg, dword[r15 + offsetof(JitState, rsb_ptr)]);
|
|
|
|
code->add(index_reg, 1);
|
|
|
|
code->and_(index_reg, u32(JitState::RSBSize - 1));
|
2016-08-15 15:48:22 +01:00
|
|
|
|
2016-12-19 15:01:49 +00:00
|
|
|
code->mov(loc_desc_reg, unique_hash_of_target);
|
|
|
|
|
|
|
|
patch_information[unique_hash_of_target].mov_rcx.emplace_back(code->getCurr());
|
|
|
|
EmitPatchMovRcx(target_code_ptr);
|
2016-08-15 15:48:22 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Label label;
|
2016-08-15 15:48:22 +01:00
|
|
|
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cmp(loc_desc_reg, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
|
|
|
code->je(label, code->T_SHORT);
|
2016-08-15 15:48:22 +01:00
|
|
|
}
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg);
|
|
|
|
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
|
|
|
|
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
|
|
|
|
code->L(label);
|
2016-08-13 00:10:23 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitGetCarryFromOp(IR::Block&, IR::Inst*) {
|
2016-08-26 16:43:51 +01:00
|
|
|
ASSERT_MSG(false, "should never happen");
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitGetOverflowFromOp(IR::Block&, IR::Inst*) {
|
2016-08-26 16:43:51 +01:00
|
|
|
ASSERT_MSG(false, "should never happen");
|
2016-07-08 10:09:18 +01:00
|
|
|
}
|
|
|
|
|
2016-12-04 20:52:06 +00:00
|
|
|
void EmitX64::EmitGetGEFromOp(IR::Block&, IR::Inst*) {
|
|
|
|
ASSERT_MSG(false, "should never happen");
|
|
|
|
}
|
|
|
|
|
2016-08-04 22:04:42 +01:00
|
|
|
void EmitX64::EmitPack2x32To1x64(IR::Block&, IR::Inst* inst) {
|
2016-08-05 18:41:25 +01:00
|
|
|
OpArg lo;
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 01:03:39 +01:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 20:07:08 +01:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
lo = result.cvt32();
|
2016-08-06 01:03:39 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
std::tie(lo, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 01:03:39 +01:00
|
|
|
}
|
2016-08-24 20:07:08 +01:00
|
|
|
lo.setBit(32);
|
|
|
|
Xbyak::Reg64 hi = reg_alloc.UseScratchGpr(inst->GetArg(1));
|
2016-08-05 15:27:29 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shl(hi, 32);
|
|
|
|
code->mov(result.cvt32(), *lo); // Zero extend to 64-bits
|
|
|
|
code->or_(result, hi);
|
2016-08-04 22:04:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitLeastSignificantWord(IR::Block&, IR::Inst* inst) {
|
2016-08-05 15:27:29 +01:00
|
|
|
reg_alloc.RegisterAddDef(inst, inst->GetArg(0));
|
2016-08-04 22:04:42 +01:00
|
|
|
}
|
|
|
|
|
2016-08-06 21:03:57 +01:00
|
|
|
void EmitX64::EmitMostSignificantWord(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 21:08:47 +01:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
2016-08-06 21:03:57 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shr(result, 32);
|
2016-08-06 21:03:57 +01:00
|
|
|
|
|
|
|
if (carry_inst) {
|
|
|
|
EraseInstruction(block, carry_inst);
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->DecrementRemainingUses();
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 carry = reg_alloc.DefGpr(carry_inst);
|
2016-08-06 21:03:57 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->setc(carry.cvt8());
|
2016-08-06 21:03:57 +01:00
|
|
|
}
|
2016-08-04 22:04:42 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitLeastSignificantHalf(IR::Block&, IR::Inst* inst) {
|
2016-08-05 14:11:27 +01:00
|
|
|
reg_alloc.RegisterAddDef(inst, inst->GetArg(0));
|
2016-07-11 23:06:35 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitLeastSignificantByte(IR::Block&, IR::Inst* inst) {
|
2016-08-05 14:11:27 +01:00
|
|
|
reg_alloc.RegisterAddDef(inst, inst->GetArg(0));
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitMostSignificantBit(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-01 14:01:06 +01:00
|
|
|
|
2016-07-04 10:22:11 +01:00
|
|
|
// TODO: Flag optimization
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shr(result, 31);
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitIsZero(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-01 14:01:06 +01:00
|
|
|
|
2016-07-04 10:22:11 +01:00
|
|
|
// TODO: Flag optimization
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->test(result, result);
|
|
|
|
code->sete(result.cvt8());
|
|
|
|
code->movzx(result, result.cvt8());
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-08-04 22:04:42 +01:00
|
|
|
void EmitX64::EmitIsZero64(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
2016-08-04 22:04:42 +01:00
|
|
|
|
|
|
|
// TODO: Flag optimization
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->test(result, result);
|
|
|
|
code->sete(result.cvt8());
|
|
|
|
code->movzx(result, result.cvt8());
|
2016-08-04 22:04:42 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitLogicalShiftLeft(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 21:08:47 +01:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-01 14:01:06 +01:00
|
|
|
|
|
|
|
// TODO: Consider using BMI2 instructions like SHLX when arm-in-host flags is implemented.
|
|
|
|
|
|
|
|
if (!carry_inst) {
|
2016-07-22 23:55:00 +01:00
|
|
|
if (!inst->GetArg(2).IsImmediate()) {
|
|
|
|
// TODO: Remove redundant argument.
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->GetArg(2).GetInst()->DecrementRemainingUses();
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-22 23:55:00 +01:00
|
|
|
u8 shift = shift_arg.GetU8();
|
|
|
|
|
|
|
|
if (shift <= 31) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shl(result, shift);
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->xor_(result, result);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 zero = reg_alloc.ScratchGpr().cvt32();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
// The 32-bit x64 SHL instruction masks the shift count by 0x1F before performing the shift.
|
|
|
|
// ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros.
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shl(result, shift);
|
|
|
|
code->xor_(zero, zero);
|
|
|
|
code->cmp(shift, 32);
|
|
|
|
code->cmovnb(result, zero);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-01 14:01:06 +01:00
|
|
|
} else {
|
2016-07-22 23:55:00 +01:00
|
|
|
EraseInstruction(block, carry_inst);
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->DecrementRemainingUses();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
|
|
|
u8 shift = shift_arg.GetU8();
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
if (shift == 0) {
|
|
|
|
// There is nothing more to do.
|
|
|
|
} else if (shift < 32) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->shl(result, shift);
|
|
|
|
code->setc(carry.cvt8());
|
2016-07-22 23:55:00 +01:00
|
|
|
} else if (shift > 32) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->xor_(result, result);
|
|
|
|
code->xor_(carry, carry);
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->mov(carry, result);
|
|
|
|
code->xor_(result, result);
|
|
|
|
code->and_(carry, 1);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
// TODO: Optimize this.
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->inLocalLabel();
|
|
|
|
|
|
|
|
code->cmp(shift, 32);
|
|
|
|
code->ja(".Rs_gt32");
|
|
|
|
code->je(".Rs_eq32");
|
2016-07-22 23:55:00 +01:00
|
|
|
// if (Rs & 0xFF < 32) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->bt(carry.cvt32(), 0); // Set the carry flag for correct behaviour in the case when Rs & 0xFF == 0
|
|
|
|
code->shl(result, shift);
|
|
|
|
code->setc(carry.cvt8());
|
|
|
|
code->jmp(".end");
|
2016-07-22 23:55:00 +01:00
|
|
|
// } else if (Rs & 0xFF > 32) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->L(".Rs_gt32");
|
|
|
|
code->xor_(result, result);
|
|
|
|
code->xor_(carry, carry);
|
|
|
|
code->jmp(".end");
|
2016-07-22 23:55:00 +01:00
|
|
|
// } else if (Rs & 0xFF == 32) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->L(".Rs_eq32");
|
|
|
|
code->mov(carry, result);
|
|
|
|
code->and_(carry, 1);
|
|
|
|
code->xor_(result, result);
|
2016-07-22 23:55:00 +01:00
|
|
|
// }
|
2016-08-24 20:07:08 +01:00
|
|
|
code->L(".end");
|
|
|
|
|
|
|
|
code->outLocalLabel();
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitLogicalShiftRight(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 21:08:47 +01:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-01 14:01:06 +01:00
|
|
|
|
|
|
|
if (!carry_inst) {
|
2016-07-22 23:55:00 +01:00
|
|
|
if (!inst->GetArg(2).IsImmediate()) {
|
|
|
|
// TODO: Remove redundant argument.
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->GetArg(2).GetInst()->DecrementRemainingUses();
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-22 23:55:00 +01:00
|
|
|
u8 shift = shift_arg.GetU8();
|
|
|
|
|
|
|
|
if (shift <= 31) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shr(result, shift);
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->xor_(result, result);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 zero = reg_alloc.ScratchGpr().cvt32();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
// The 32-bit x64 SHR instruction masks the shift count by 0x1F before performing the shift.
|
|
|
|
// ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros.
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shr(result, shift);
|
|
|
|
code->xor_(zero, zero);
|
|
|
|
code->cmp(shift, 32);
|
|
|
|
code->cmovnb(result, zero);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-01 14:01:06 +01:00
|
|
|
} else {
|
2016-07-22 23:55:00 +01:00
|
|
|
EraseInstruction(block, carry_inst);
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->DecrementRemainingUses();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
|
|
|
u8 shift = shift_arg.GetU8();
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
if (shift == 0) {
|
|
|
|
// There is nothing more to do.
|
|
|
|
} else if (shift < 32) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shr(result, shift);
|
|
|
|
code->setc(carry.cvt8());
|
2016-07-22 23:55:00 +01:00
|
|
|
} else if (shift == 32) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->bt(result, 31);
|
|
|
|
code->setc(carry.cvt8());
|
|
|
|
code->mov(result, 0);
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->xor_(result, result);
|
|
|
|
code->xor_(carry, carry);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt32();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
// TODO: Optimize this.
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->inLocalLabel();
|
|
|
|
|
|
|
|
code->cmp(shift, 32);
|
|
|
|
code->ja(".Rs_gt32");
|
|
|
|
code->je(".Rs_eq32");
|
2016-07-22 23:55:00 +01:00
|
|
|
// if (Rs & 0xFF == 0) goto end;
|
2016-08-24 20:07:08 +01:00
|
|
|
code->test(shift, shift);
|
|
|
|
code->jz(".end");
|
2016-07-22 23:55:00 +01:00
|
|
|
// if (Rs & 0xFF < 32) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shr(result, shift);
|
|
|
|
code->setc(carry.cvt8());
|
|
|
|
code->jmp(".end");
|
2016-07-22 23:55:00 +01:00
|
|
|
// } else if (Rs & 0xFF > 32) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->L(".Rs_gt32");
|
|
|
|
code->xor_(result, result);
|
|
|
|
code->xor_(carry, carry);
|
|
|
|
code->jmp(".end");
|
2016-07-22 23:55:00 +01:00
|
|
|
// } else if (Rs & 0xFF == 32) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->L(".Rs_eq32");
|
|
|
|
code->bt(result, 31);
|
|
|
|
code->setc(carry.cvt8());
|
|
|
|
code->xor_(result, result);
|
2016-07-22 23:55:00 +01:00
|
|
|
// }
|
2016-08-24 20:07:08 +01:00
|
|
|
code->L(".end");
|
|
|
|
|
|
|
|
code->outLocalLabel();
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitLogicalShiftRight64(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
2016-08-07 14:23:33 +01:00
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
ASSERT_MSG(shift_arg.IsImmediate(), "variable 64 bit shifts are not implemented");
|
|
|
|
u8 shift = shift_arg.GetU8();
|
|
|
|
ASSERT_MSG(shift < 64, "shift width clamping is not implemented");
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shr(result.cvt64(), shift);
|
2016-08-07 14:23:33 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitArithmeticShiftRight(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 21:08:47 +01:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-04 10:22:11 +01:00
|
|
|
|
|
|
|
if (!carry_inst) {
|
2016-07-22 23:55:00 +01:00
|
|
|
if (!inst->GetArg(2).IsImmediate()) {
|
|
|
|
// TODO: Remove redundant argument.
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->GetArg(2).GetInst()->DecrementRemainingUses();
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
2016-08-02 12:00:11 +01:00
|
|
|
u8 shift = shift_arg.GetU8();
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->sar(result, u8(shift < 31 ? shift : 31));
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 shift = reg_alloc.UseScratchGpr(shift_arg, {HostLoc::RCX}).cvt32();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg32 const31 = reg_alloc.ScratchGpr().cvt32();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
// The 32-bit x64 SAR instruction masks the shift count by 0x1F before performing the shift.
|
|
|
|
// ARM differs from the behaviour: It does not mask the count.
|
|
|
|
|
|
|
|
// We note that all shift values above 31 have the same behaviour as 31 does, so we saturate `shift` to 31.
|
2016-08-24 20:07:08 +01:00
|
|
|
code->mov(const31, 31);
|
|
|
|
code->movzx(shift, shift.cvt8());
|
|
|
|
code->cmp(shift, u32(31));
|
|
|
|
code->cmovg(shift, const31);
|
|
|
|
code->sar(result, shift.cvt8());
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-04 10:22:11 +01:00
|
|
|
} else {
|
2016-07-22 23:55:00 +01:00
|
|
|
EraseInstruction(block, carry_inst);
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->DecrementRemainingUses();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
|
|
|
u8 shift = shift_arg.GetU8();
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
if (shift == 0) {
|
|
|
|
// There is nothing more to do.
|
|
|
|
} else if (shift <= 31) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->sar(result, shift);
|
|
|
|
code->setc(carry);
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->sar(result, 31);
|
|
|
|
code->bt(result, 31);
|
|
|
|
code->setc(carry);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
// TODO: Optimize this.
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->inLocalLabel();
|
|
|
|
|
|
|
|
code->cmp(shift, u32(31));
|
|
|
|
code->ja(".Rs_gt31");
|
2016-07-22 23:55:00 +01:00
|
|
|
// if (Rs & 0xFF == 0) goto end;
|
2016-08-24 20:07:08 +01:00
|
|
|
code->test(shift, shift);
|
|
|
|
code->jz(".end");
|
2016-07-22 23:55:00 +01:00
|
|
|
// if (Rs & 0xFF <= 31) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->sar(result, shift);
|
|
|
|
code->setc(carry);
|
|
|
|
code->jmp(".end");
|
2016-07-22 23:55:00 +01:00
|
|
|
// } else if (Rs & 0xFF > 31) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->L(".Rs_gt31");
|
|
|
|
code->sar(result, 31); // 31 produces the same results as anything above 31
|
|
|
|
code->bt(result, 31);
|
|
|
|
code->setc(carry);
|
2016-07-22 23:55:00 +01:00
|
|
|
// }
|
2016-08-24 20:07:08 +01:00
|
|
|
code->L(".end");
|
|
|
|
|
|
|
|
code->outLocalLabel();
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-10 01:18:17 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitRotateRight(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 21:08:47 +01:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-10 01:18:17 +01:00
|
|
|
|
|
|
|
if (!carry_inst) {
|
2016-07-22 23:55:00 +01:00
|
|
|
if (!inst->GetArg(2).IsImmediate()) {
|
|
|
|
// TODO: Remove redundant argument.
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->GetArg(2).GetInst()->DecrementRemainingUses();
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
|
|
|
u8 shift = shift_arg.GetU8();
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-10 01:18:17 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->ror(result, u8(shift & 0x1F));
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
// x64 ROR instruction does (shift & 0x1F) for us.
|
2016-08-24 20:07:08 +01:00
|
|
|
code->ror(result, shift);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-10 01:18:17 +01:00
|
|
|
} else {
|
2016-07-22 23:55:00 +01:00
|
|
|
EraseInstruction(block, carry_inst);
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->DecrementRemainingUses();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
auto shift_arg = inst->GetArg(1);
|
|
|
|
|
|
|
|
if (shift_arg.IsImmediate()) {
|
|
|
|
u8 shift = shift_arg.GetU8();
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
if (shift == 0) {
|
|
|
|
// There is nothing more to do.
|
|
|
|
} else if ((shift & 0x1F) == 0) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->bt(result, u8(31));
|
|
|
|
code->setc(carry);
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->ror(result, shift);
|
|
|
|
code->setc(carry);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg8 shift = reg_alloc.UseScratchGpr(shift_arg, {HostLoc::RCX}).cvt8();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = reg_alloc.UseDefGpr(inst->GetArg(2), carry_inst).cvt8();
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
// TODO: Optimize
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->inLocalLabel();
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
// if (Rs & 0xFF == 0) goto end;
|
2016-08-24 20:07:08 +01:00
|
|
|
code->test(shift, shift);
|
|
|
|
code->jz(".end");
|
2016-07-22 23:55:00 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(shift.cvt32(), u32(0x1F));
|
|
|
|
code->jz(".zero_1F");
|
2016-07-22 23:55:00 +01:00
|
|
|
// if (Rs & 0x1F != 0) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->ror(result, shift);
|
|
|
|
code->setc(carry);
|
|
|
|
code->jmp(".end");
|
2016-07-22 23:55:00 +01:00
|
|
|
// } else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->L(".zero_1F");
|
|
|
|
code->bt(result, u8(31));
|
|
|
|
code->setc(carry);
|
2016-07-22 23:55:00 +01:00
|
|
|
// }
|
2016-08-24 20:07:08 +01:00
|
|
|
code->L(".end");
|
|
|
|
|
|
|
|
code->outLocalLabel();
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-04 10:22:11 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-31 19:07:35 +01:00
|
|
|
void EmitX64::EmitRotateRightExtended(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 21:08:47 +01:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
2016-07-31 19:07:35 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = carry_inst
|
|
|
|
? reg_alloc.UseDefGpr(inst->GetArg(1), carry_inst).cvt8()
|
|
|
|
: reg_alloc.UseGpr(inst->GetArg(1)).cvt8();
|
2016-07-31 19:07:35 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->rcr(result, 1);
|
2016-07-31 19:07:35 +01:00
|
|
|
|
|
|
|
if (carry_inst) {
|
|
|
|
EraseInstruction(block, carry_inst);
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->DecrementRemainingUses();
|
2016-08-24 20:07:08 +01:00
|
|
|
code->setc(carry);
|
2016-07-31 19:07:35 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
const Xbyak::Reg64 INVALID_REG = Xbyak::Reg64(-1);
|
|
|
|
|
|
|
|
static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, const IR::Value& carry_in, IR::Inst* carry_out) {
|
2016-07-22 23:55:00 +01:00
|
|
|
if (carry_in.IsImmediate()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
return carry_out ? reg_alloc.DefGpr(carry_out).cvt8() : INVALID_REG.cvt8();
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
return carry_out ? reg_alloc.UseDefGpr(carry_in, carry_out).cvt8() : reg_alloc.UseGpr(carry_in).cvt8();
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitAddWithCarry(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 21:08:47 +01:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
2016-07-22 23:55:00 +01:00
|
|
|
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
IR::Value carry_in = inst->GetArg(2);
|
2016-07-08 10:09:18 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = DoCarry(reg_alloc, carry_in, carry_inst);
|
|
|
|
Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt8() : INVALID_REG.cvt8();
|
2016-07-08 10:09:18 +01:00
|
|
|
|
|
|
|
// TODO: Consider using LEA.
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
if (b.IsImmediate()) {
|
|
|
|
u32 op_arg = b.GetU32();
|
|
|
|
if (carry_in.IsImmediate()) {
|
|
|
|
if (carry_in.GetU1()) {
|
|
|
|
code->stc();
|
|
|
|
code->adc(result, op_arg);
|
|
|
|
} else {
|
|
|
|
code->add(result, op_arg);
|
|
|
|
}
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->adc(result, op_arg);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
|
|
|
op_arg.setBit(32);
|
|
|
|
if (carry_in.IsImmediate()) {
|
|
|
|
if (carry_in.GetU1()) {
|
|
|
|
code->stc();
|
|
|
|
code->adc(result, *op_arg);
|
|
|
|
} else {
|
|
|
|
code->add(result, *op_arg);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->adc(result, *op_arg);
|
|
|
|
}
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-08 10:09:18 +01:00
|
|
|
|
|
|
|
if (carry_inst) {
|
2016-07-22 23:55:00 +01:00
|
|
|
EraseInstruction(block, carry_inst);
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->DecrementRemainingUses();
|
2016-08-24 20:07:08 +01:00
|
|
|
code->setc(carry);
|
2016-07-08 10:09:18 +01:00
|
|
|
}
|
|
|
|
if (overflow_inst) {
|
2016-07-22 23:55:00 +01:00
|
|
|
EraseInstruction(block, overflow_inst);
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->DecrementRemainingUses();
|
2016-08-24 20:07:08 +01:00
|
|
|
code->seto(overflow);
|
2016-07-08 10:09:18 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitAdd64(IR::Block&, IR::Inst* inst) {
|
2016-08-04 22:04:42 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(a, inst);
|
|
|
|
Xbyak::Reg64 op_arg = reg_alloc.UseGpr(b);
|
2016-08-04 22:04:42 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->add(result, op_arg);
|
2016-08-04 22:04:42 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitSubWithCarry(IR::Block& block, IR::Inst* inst) {
|
2016-08-25 21:08:47 +01:00
|
|
|
auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);
|
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
2016-07-08 11:49:30 +01:00
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
IR::Value carry_in = inst->GetArg(2);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg8 carry = DoCarry(reg_alloc, carry_in, carry_inst);
|
|
|
|
Xbyak::Reg8 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt8() : INVALID_REG.cvt8();
|
2016-07-08 11:49:30 +01:00
|
|
|
|
|
|
|
// TODO: Consider using LEA.
|
2016-07-22 23:55:00 +01:00
|
|
|
// TODO: Optimize CMP case.
|
2016-07-08 11:49:30 +01:00
|
|
|
// Note that x64 CF is inverse of what the ARM carry flag is here.
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
if (b.IsImmediate()) {
|
|
|
|
u32 op_arg = b.GetU32();
|
|
|
|
if (carry_in.IsImmediate()) {
|
|
|
|
if (carry_in.GetU1()) {
|
|
|
|
code->sub(result, op_arg);
|
|
|
|
} else {
|
|
|
|
code->stc();
|
|
|
|
code->sbb(result, op_arg);
|
|
|
|
}
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->cmc();
|
|
|
|
code->sbb(result, op_arg);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
|
|
|
op_arg.setBit(32);
|
|
|
|
if (carry_in.IsImmediate()) {
|
|
|
|
if (carry_in.GetU1()) {
|
|
|
|
code->sub(result, *op_arg);
|
|
|
|
} else {
|
|
|
|
code->stc();
|
|
|
|
code->sbb(result, *op_arg);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
code->bt(carry.cvt32(), 0);
|
|
|
|
code->cmc();
|
|
|
|
code->sbb(result, *op_arg);
|
|
|
|
}
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
2016-07-08 11:49:30 +01:00
|
|
|
|
|
|
|
if (carry_inst) {
|
2016-07-22 23:55:00 +01:00
|
|
|
EraseInstruction(block, carry_inst);
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->DecrementRemainingUses();
|
2016-08-24 20:07:08 +01:00
|
|
|
code->setnc(carry);
|
2016-07-08 11:49:30 +01:00
|
|
|
}
|
|
|
|
if (overflow_inst) {
|
2016-07-22 23:55:00 +01:00
|
|
|
EraseInstruction(block, overflow_inst);
|
2016-11-30 21:51:06 +00:00
|
|
|
inst->DecrementRemainingUses();
|
2016-08-24 20:07:08 +01:00
|
|
|
code->seto(overflow);
|
2016-07-08 11:49:30 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitSub64(IR::Block&, IR::Inst* inst) {
|
2016-08-06 06:09:47 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(a, inst);
|
|
|
|
Xbyak::Reg64 op_arg = reg_alloc.UseGpr(b);
|
2016-08-06 06:09:47 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->sub(result, op_arg);
|
2016-08-06 06:09:47 +01:00
|
|
|
}
|
|
|
|
|
2016-08-04 22:04:42 +01:00
|
|
|
void EmitX64::EmitMul(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
if (a.IsImmediate())
|
|
|
|
std::swap(a, b);
|
2016-08-05 15:27:29 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
2016-08-04 22:04:42 +01:00
|
|
|
if (b.IsImmediate()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->imul(result, result, b.GetU32());
|
2016-08-04 22:04:42 +01:00
|
|
|
} else {
|
2016-08-05 15:27:29 +01:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
2016-08-24 20:07:08 +01:00
|
|
|
op_arg.setBit(32);
|
|
|
|
|
|
|
|
code->imul(result, *op_arg);
|
2016-08-04 22:04:42 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitMul64(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
2016-08-05 15:27:29 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(a, inst);
|
2016-08-05 15:27:29 +01:00
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->imul(result, *op_arg);
|
2016-08-04 22:04:42 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitAnd(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
2016-07-08 10:43:28 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
|
|
|
|
if (b.IsImmediate()) {
|
|
|
|
u32 op_arg = b.GetU32();
|
2016-07-08 10:43:28 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(result, op_arg);
|
|
|
|
} else {
|
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
|
|
|
op_arg.setBit(32);
|
2016-07-08 10:43:28 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(result, *op_arg);
|
|
|
|
}
|
2016-07-08 10:43:28 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitEor(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
2016-07-08 11:14:50 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
|
|
|
|
if (b.IsImmediate()) {
|
|
|
|
u32 op_arg = b.GetU32();
|
2016-07-08 11:14:50 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->xor_(result, op_arg);
|
|
|
|
} else {
|
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
|
|
|
op_arg.setBit(32);
|
2016-07-08 11:14:50 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->xor_(result, *op_arg);
|
|
|
|
}
|
2016-07-08 11:14:50 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitOr(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
2016-07-10 02:06:38 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
|
|
|
|
if (b.IsImmediate()) {
|
|
|
|
u32 op_arg = b.GetU32();
|
2016-07-10 02:06:38 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->or_(result, op_arg);
|
|
|
|
} else {
|
|
|
|
OpArg op_arg = reg_alloc.UseOpArg(b, any_gpr);
|
|
|
|
op_arg.setBit(32);
|
2016-07-10 02:06:38 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->or_(result, *op_arg);
|
|
|
|
}
|
2016-07-10 02:06:38 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitNot(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
2016-07-10 03:44:45 +01:00
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
if (a.IsImmediate()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
2016-07-10 03:44:45 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->mov(result, u32(~a.GetU32()));
|
2016-07-22 23:55:00 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
2016-07-10 03:44:45 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->not_(result);
|
2016-07-22 23:55:00 +01:00
|
|
|
}
|
|
|
|
}
|
2016-07-16 19:23:42 +01:00
|
|
|
|
2016-08-04 22:04:42 +01:00
|
|
|
void EmitX64::EmitSignExtendWordToLong(IR::Block&, IR::Inst* inst) {
|
2016-08-05 15:27:29 +01:00
|
|
|
OpArg source;
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 01:03:39 +01:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 20:07:08 +01:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
source = result;
|
2016-08-06 01:03:39 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 01:03:39 +01:00
|
|
|
}
|
2016-08-04 22:04:42 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
source.setBit(32);
|
|
|
|
code->movsxd(result.cvt64(), *source);
|
2016-08-04 22:04:42 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitSignExtendHalfToWord(IR::Block&, IR::Inst* inst) {
|
2016-08-05 14:11:27 +01:00
|
|
|
OpArg source;
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 01:03:39 +01:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 20:07:08 +01:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
source = result;
|
2016-08-06 01:03:39 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 01:03:39 +01:00
|
|
|
}
|
2016-07-16 19:23:42 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
source.setBit(16);
|
|
|
|
code->movsx(result.cvt32(), *source);
|
2016-07-16 19:23:42 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitSignExtendByteToWord(IR::Block&, IR::Inst* inst) {
|
2016-08-05 14:11:27 +01:00
|
|
|
OpArg source;
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 01:03:39 +01:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 20:07:08 +01:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
source = result;
|
2016-08-06 01:03:39 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 01:03:39 +01:00
|
|
|
}
|
2016-07-16 19:23:42 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
source.setBit(8);
|
|
|
|
code->movsx(result.cvt32(), *source);
|
2016-07-16 19:23:42 +01:00
|
|
|
}
|
|
|
|
|
2016-08-04 22:04:42 +01:00
|
|
|
void EmitX64::EmitZeroExtendWordToLong(IR::Block&, IR::Inst* inst) {
|
2016-08-05 15:27:29 +01:00
|
|
|
OpArg source;
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 01:03:39 +01:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 20:07:08 +01:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
source = result;
|
2016-08-06 01:03:39 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 01:03:39 +01:00
|
|
|
}
|
2016-08-04 22:04:42 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
source.setBit(32);
|
|
|
|
code->mov(result.cvt32(), *source); // x64 zeros upper 32 bits on a 32-bit move
|
2016-08-04 22:04:42 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitZeroExtendHalfToWord(IR::Block&, IR::Inst* inst) {
|
2016-08-05 14:11:27 +01:00
|
|
|
OpArg source;
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 01:03:39 +01:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 20:07:08 +01:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
source = result;
|
2016-08-06 01:03:39 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 01:03:39 +01:00
|
|
|
}
|
2016-07-16 19:23:42 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
source.setBit(16);
|
|
|
|
code->movzx(result.cvt32(), *source);
|
2016-07-16 19:23:42 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitZeroExtendByteToWord(IR::Block&, IR::Inst* inst) {
|
2016-08-05 14:11:27 +01:00
|
|
|
OpArg source;
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result;
|
2016-08-06 01:03:39 +01:00
|
|
|
if (inst->GetArg(0).IsImmediate()) {
|
|
|
|
// TODO: Optimize
|
2016-08-24 20:07:08 +01:00
|
|
|
result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
|
|
|
source = result;
|
2016-08-06 01:03:39 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
std::tie(source, result) = reg_alloc.UseDefOpArgGpr(inst->GetArg(0), inst);
|
2016-08-06 01:03:39 +01:00
|
|
|
}
|
2016-07-16 19:23:42 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
source.setBit(8);
|
|
|
|
code->movzx(result.cvt32(), *source);
|
2016-07-16 19:23:42 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitByteReverseWord(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt32();
|
2016-07-16 19:23:42 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->bswap(result);
|
2016-07-16 19:23:42 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitByteReverseHalf(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg16 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst).cvt16();
|
2016-07-16 19:23:42 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->rol(result, 8);
|
2016-07-16 19:23:42 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitByteReverseDual(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result = reg_alloc.UseDefGpr(inst->GetArg(0), inst);
|
2016-07-20 15:34:17 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->bswap(result);
|
2016-07-20 15:34:17 +01:00
|
|
|
}
|
2016-07-16 19:23:42 +01:00
|
|
|
|
2016-12-15 22:33:20 +00:00
|
|
|
void EmitX64::EmitCountLeadingZeros(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
|
|
|
if (cpu_info.has(Xbyak::util::Cpu::tLZCNT)) {
|
|
|
|
Xbyak::Reg32 source = reg_alloc.UseGpr(a).cvt32();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
|
|
|
|
code->lzcnt(result, source);
|
|
|
|
} else {
|
|
|
|
Xbyak::Reg32 source = reg_alloc.UseScratchGpr(a).cvt32();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
|
|
|
|
// The result of a bsr of zero is undefined, but zf is set after it.
|
|
|
|
code->bsr(result, source);
|
|
|
|
code->mov(source, 0xFFFFFFFF);
|
|
|
|
code->cmovz(result, source);
|
|
|
|
code->neg(result);
|
|
|
|
code->add(result, 31);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-18 16:25:41 +00:00
|
|
|
void EmitX64::EmitNegateLowWord(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
|
|
|
|
code->ror(result, 16);
|
|
|
|
code->xor(result, 0xFFFF0000);
|
|
|
|
code->add(result, 0x00010000);
|
|
|
|
code->ror(result, 16);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitNegateHighWord(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
|
|
|
|
code->xor(result, 0xFFFF0000);
|
|
|
|
code->add(result, 0x00010000);
|
|
|
|
}
|
|
|
|
|
2016-12-15 22:33:20 +00:00
|
|
|
void EmitX64::EmitSignedSaturatedAdd(IR::Block& block, IR::Inst* inst) {
|
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
|
|
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 addend = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
|
|
|
code->mov(overflow, result);
|
|
|
|
code->shr(overflow, 31);
|
|
|
|
code->add(overflow, 0x7FFFFFFF);
|
|
|
|
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
|
|
|
code->add(result, addend);
|
|
|
|
code->cmovo(result, overflow);
|
|
|
|
|
|
|
|
if (overflow_inst) {
|
|
|
|
EraseInstruction(block, overflow_inst);
|
|
|
|
inst->DecrementRemainingUses();
|
|
|
|
|
|
|
|
code->seto(overflow.cvt8());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitSignedSaturatedSub(IR::Block& block, IR::Inst* inst) {
|
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
|
|
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 subend = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
|
|
|
code->mov(overflow, result);
|
|
|
|
code->shr(overflow, 31);
|
|
|
|
code->add(overflow, 0x7FFFFFFF);
|
|
|
|
// overflow now contains 0x7FFFFFFF if a was positive, or 0x80000000 if a was negative
|
|
|
|
code->sub(result, subend);
|
|
|
|
code->cmovo(result, overflow);
|
|
|
|
|
|
|
|
if (overflow_inst) {
|
|
|
|
EraseInstruction(block, overflow_inst);
|
|
|
|
inst->DecrementRemainingUses();
|
|
|
|
|
|
|
|
code->seto(overflow.cvt8());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-21 14:16:48 +00:00
|
|
|
void EmitX64::EmitUnsignedSaturation(IR::Block& block, IR::Inst* inst) {
|
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
|
|
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
size_t N = inst->GetArg(1).GetU8();
|
|
|
|
ASSERT(N <= 31);
|
|
|
|
|
|
|
|
u32 saturated_value = (1u << N) - 1;
|
|
|
|
|
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseGpr(a).cvt32();
|
|
|
|
Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
|
|
|
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
|
|
|
|
code->xor_(overflow, overflow);
|
|
|
|
code->cmp(reg_a, saturated_value);
|
|
|
|
code->mov(result, saturated_value);
|
|
|
|
code->cmovle(result, overflow);
|
|
|
|
code->cmovbe(result, reg_a);
|
|
|
|
|
|
|
|
if (overflow_inst) {
|
|
|
|
EraseInstruction(block, overflow_inst);
|
|
|
|
inst->DecrementRemainingUses();
|
|
|
|
|
|
|
|
code->seta(overflow.cvt8());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitSignedSaturation(IR::Block& block, IR::Inst* inst) {
|
|
|
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
|
|
|
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
size_t N = inst->GetArg(1).GetU8();
|
|
|
|
ASSERT(N >= 1 && N <= 32);
|
|
|
|
|
|
|
|
if (N == 32) {
|
|
|
|
reg_alloc.RegisterAddDef(inst, a);
|
|
|
|
if (overflow_inst) {
|
|
|
|
auto no_overflow = IR::Value(false);
|
|
|
|
overflow_inst->ReplaceUsesWith(no_overflow);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
u32 mask = (1u << N) - 1;
|
|
|
|
u32 positive_saturated_value = (1u << (N - 1)) - 1;
|
|
|
|
u32 negative_saturated_value = 1u << (N - 1);
|
|
|
|
u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);
|
|
|
|
|
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseGpr(a).cvt32();
|
|
|
|
Xbyak::Reg32 overflow = overflow_inst ? reg_alloc.DefGpr(overflow_inst).cvt32() : reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
|
|
|
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
|
|
|
|
code->lea(overflow, code->ptr[reg_a.cvt64() + negative_saturated_value]);
|
|
|
|
|
|
|
|
// Put the appropriate saturated value in result
|
|
|
|
code->cmp(reg_a, positive_saturated_value);
|
|
|
|
code->mov(tmp, positive_saturated_value);
|
|
|
|
code->mov(result, sext_negative_satured_value);
|
|
|
|
code->cmovg(result, tmp);
|
|
|
|
|
|
|
|
// Do the saturation
|
|
|
|
code->cmp(overflow, mask);
|
|
|
|
code->cmovbe(result, reg_a);
|
|
|
|
|
|
|
|
if (overflow_inst) {
|
|
|
|
EraseInstruction(block, overflow_inst);
|
|
|
|
inst->DecrementRemainingUses();
|
|
|
|
|
|
|
|
code->seta(overflow.cvt8());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-20 22:05:51 +00:00
|
|
|
/**
|
|
|
|
* Extracts the most significant bits from each of the packed bytes, and packs them together.
|
|
|
|
*
|
|
|
|
* value before: a-------b-------c-------d-------
|
|
|
|
* value after: 0000000000000000000000000000abcd
|
|
|
|
*
|
|
|
|
* @param value The register containing the value to operate on. Result will be stored in the same register.
|
|
|
|
* @param a_tmp A register which can be used as a scratch register.
|
|
|
|
*/
|
2016-12-18 16:25:41 +00:00
|
|
|
static void ExtractMostSignificantBitFromPackedBytes(const Xbyak::util::Cpu& cpu_info, BlockOfCode* code, RegAlloc& reg_alloc, Xbyak::Reg32 value, boost::optional<Xbyak::Reg32> a_tmp = boost::none) {
|
|
|
|
if (cpu_info.has(Xbyak::util::Cpu::tBMI2)) {
|
|
|
|
Xbyak::Reg32 tmp = a_tmp ? *a_tmp : reg_alloc.ScratchGpr().cvt32();
|
|
|
|
code->mov(tmp, 0x80808080);
|
|
|
|
code->pext(value, value, tmp);
|
|
|
|
} else {
|
|
|
|
code->and_(value, 0x80808080);
|
|
|
|
code->imul(value, value, 0x00204081);
|
|
|
|
code->shr(value, 28);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-20 22:06:14 +00:00
|
|
|
/**
|
|
|
|
* Extracts the most significant bits from each of the packed words, duplicates them, and packs them together.
|
|
|
|
*
|
|
|
|
* value before: a---------------b---------------
|
|
|
|
* value after: 0000000000000000000000000000aabb
|
|
|
|
*
|
|
|
|
* @param value The register containing the value to operate on. Result will be stored in the same register.
|
|
|
|
*/
|
2016-12-18 16:25:41 +00:00
|
|
|
static void ExtractAndDuplicateMostSignificantBitFromPackedWords(BlockOfCode* code, Xbyak::Reg32 value) {
|
|
|
|
code->and_(value, 0x80008000);
|
|
|
|
code->shr(value, 1);
|
|
|
|
code->imul(value, value, 0xC003);
|
|
|
|
code->shr(value, 28);
|
|
|
|
}
|
|
|
|
|
2016-12-04 20:52:33 +00:00
|
|
|
void EmitX64::EmitPackedAddU8(IR::Block& block, IR::Inst* inst) {
|
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
2016-08-12 16:53:16 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
2016-12-04 20:52:33 +00:00
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(a).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseScratchGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_ge, tmp;
|
2016-08-12 16:53:16 +01:00
|
|
|
|
2016-12-04 20:52:33 +00:00
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
inst->DecrementRemainingUses();
|
2016-08-12 16:53:16 +01:00
|
|
|
|
2016-12-04 20:52:33 +00:00
|
|
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
|
|
|
tmp = reg_alloc.ScratchGpr().cvt32();
|
2016-08-12 16:53:16 +01:00
|
|
|
|
2016-12-04 20:52:33 +00:00
|
|
|
code->mov(reg_ge, reg_a);
|
|
|
|
code->and_(reg_ge, reg_b);
|
|
|
|
}
|
2016-08-12 16:53:16 +01:00
|
|
|
|
2016-12-04 20:52:33 +00:00
|
|
|
// SWAR Arithmetic
|
|
|
|
code->mov(result, reg_a);
|
|
|
|
code->xor_(result, reg_b);
|
|
|
|
code->and_(result, 0x80808080);
|
|
|
|
code->and_(reg_a, 0x7F7F7F7F);
|
|
|
|
code->and_(reg_b, 0x7F7F7F7F);
|
|
|
|
code->add(reg_a, reg_b);
|
|
|
|
if (ge_inst) {
|
|
|
|
code->mov(tmp, result);
|
|
|
|
code->and_(tmp, reg_a);
|
|
|
|
code->or_(reg_ge, tmp);
|
|
|
|
}
|
|
|
|
code->xor_(result, reg_a);
|
|
|
|
if (ge_inst) {
|
2016-12-20 22:07:51 +00:00
|
|
|
ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge, tmp);
|
2016-12-18 16:25:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedAddS8(IR::Block& block, IR::Inst* inst) {
|
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 reg_ge;
|
|
|
|
|
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm();
|
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
inst->DecrementRemainingUses();
|
|
|
|
|
|
|
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
|
|
|
}
|
|
|
|
|
|
|
|
code->movd(xmm_a, reg_a);
|
|
|
|
code->movd(xmm_b, reg_b);
|
|
|
|
if (ge_inst) {
|
|
|
|
Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm();
|
|
|
|
code->movdqa(saturated_sum, xmm_a);
|
|
|
|
code->paddsb(saturated_sum, xmm_b);
|
|
|
|
code->movd(reg_ge, saturated_sum);
|
|
|
|
}
|
|
|
|
code->paddb(xmm_a, xmm_b);
|
|
|
|
code->movd(reg_a, xmm_a);
|
|
|
|
if (ge_inst) {
|
|
|
|
code->not_(reg_ge);
|
|
|
|
ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedAddU16(IR::Block& block, IR::Inst* inst) {
|
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(a).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseScratchGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_ge, tmp;
|
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
inst->DecrementRemainingUses();
|
|
|
|
|
|
|
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
|
|
|
|
|
|
|
code->mov(reg_ge, reg_a);
|
|
|
|
code->and_(reg_ge, reg_b);
|
|
|
|
}
|
|
|
|
|
|
|
|
// SWAR Arithmetic
|
|
|
|
code->mov(result, reg_a);
|
|
|
|
code->xor_(result, reg_b);
|
|
|
|
code->and_(result, 0x80008000);
|
|
|
|
code->and_(reg_a, 0x7FFF7FFF);
|
|
|
|
code->and_(reg_b, 0x7FFF7FFF);
|
|
|
|
code->add(reg_a, reg_b);
|
|
|
|
if (ge_inst) {
|
|
|
|
tmp = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
code->mov(tmp, result);
|
|
|
|
code->and_(tmp, reg_a);
|
|
|
|
code->or_(reg_ge, tmp);
|
|
|
|
}
|
|
|
|
code->xor_(result, reg_a);
|
|
|
|
if (ge_inst) {
|
|
|
|
ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedAddS16(IR::Block& block, IR::Inst* inst) {
|
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 reg_ge;
|
|
|
|
|
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm();
|
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
inst->DecrementRemainingUses();
|
|
|
|
|
|
|
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
|
|
|
}
|
|
|
|
|
|
|
|
code->movd(xmm_a, reg_a);
|
|
|
|
code->movd(xmm_b, reg_b);
|
|
|
|
if (ge_inst) {
|
|
|
|
Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm();
|
|
|
|
code->movdqa(saturated_sum, xmm_a);
|
|
|
|
code->paddsw(saturated_sum, xmm_b);
|
|
|
|
code->movd(reg_ge, saturated_sum);
|
|
|
|
}
|
|
|
|
code->paddw(xmm_a, xmm_b);
|
|
|
|
code->movd(reg_a, xmm_a);
|
|
|
|
if (ge_inst) {
|
|
|
|
code->not_(reg_ge);
|
|
|
|
ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge);
|
2016-12-04 20:52:33 +00:00
|
|
|
}
|
2016-08-12 16:53:16 +01:00
|
|
|
}
|
|
|
|
|
2016-12-05 00:27:59 +00:00
|
|
|
void EmitX64::EmitPackedSubU8(IR::Block& block, IR::Inst* inst) {
|
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 reg_ge;
|
|
|
|
|
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Xmm xmm_ge;
|
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
inst->DecrementRemainingUses();
|
|
|
|
|
|
|
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
|
|
|
xmm_ge = reg_alloc.ScratchXmm();
|
|
|
|
}
|
|
|
|
|
|
|
|
code->movd(xmm_a, reg_a);
|
|
|
|
code->movd(xmm_b, reg_b);
|
|
|
|
if (ge_inst) {
|
2016-12-05 00:56:56 +00:00
|
|
|
code->movdqa(xmm_ge, xmm_a);
|
2016-12-05 00:27:59 +00:00
|
|
|
code->pmaxub(xmm_ge, xmm_b);
|
|
|
|
code->pcmpeqb(xmm_ge, xmm_a);
|
|
|
|
code->movd(reg_ge, xmm_ge);
|
|
|
|
}
|
|
|
|
code->psubb(xmm_a, xmm_b);
|
|
|
|
code->movd(reg_a, xmm_a);
|
|
|
|
|
|
|
|
if (ge_inst) {
|
2016-12-18 16:25:41 +00:00
|
|
|
ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedSubS8(IR::Block& block, IR::Inst* inst) {
|
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 reg_ge;
|
|
|
|
|
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm();
|
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
inst->DecrementRemainingUses();
|
|
|
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
|
|
|
}
|
|
|
|
code->movd(xmm_b, reg_b);
|
|
|
|
code->movd(xmm_a, reg_a);
|
|
|
|
if (ge_inst) {
|
|
|
|
Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm();
|
|
|
|
code->movdqa(xmm_ge, xmm_a);
|
|
|
|
code->psubsb(xmm_ge, xmm_b);
|
|
|
|
code->movd(reg_ge, xmm_ge);
|
|
|
|
}
|
|
|
|
code->psubb(xmm_a, xmm_b);
|
|
|
|
code->movd(reg_a, xmm_a);
|
|
|
|
if (ge_inst) {
|
|
|
|
code->not_(reg_ge);
|
|
|
|
ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedSubU16(IR::Block& block, IR::Inst* inst) {
|
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 reg_ge;
|
|
|
|
|
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Xmm xmm_ge;
|
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
inst->DecrementRemainingUses();
|
|
|
|
|
|
|
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
|
|
|
xmm_ge = reg_alloc.ScratchXmm();
|
|
|
|
}
|
|
|
|
|
|
|
|
code->movd(xmm_a, reg_a);
|
|
|
|
code->movd(xmm_b, reg_b);
|
|
|
|
if (ge_inst) {
|
|
|
|
code->movdqa(xmm_ge, xmm_a);
|
|
|
|
code->pmaxuw(xmm_ge, xmm_b);
|
|
|
|
code->pcmpeqw(xmm_ge, xmm_a);
|
|
|
|
code->movd(reg_ge, xmm_ge);
|
|
|
|
}
|
|
|
|
code->psubw(xmm_a, xmm_b);
|
|
|
|
code->movd(reg_a, xmm_a);
|
|
|
|
if (ge_inst) {
|
|
|
|
ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPackedSubS16(IR::Block& block, IR::Inst* inst) {
|
|
|
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
|
|
|
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 reg_ge;
|
|
|
|
|
|
|
|
Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm();
|
|
|
|
|
|
|
|
if (ge_inst) {
|
|
|
|
EraseInstruction(block, ge_inst);
|
|
|
|
inst->DecrementRemainingUses();
|
|
|
|
|
|
|
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
|
|
|
}
|
|
|
|
|
|
|
|
code->movd(xmm_b, reg_b);
|
|
|
|
code->movd(xmm_a, reg_a);
|
|
|
|
if (ge_inst) {
|
|
|
|
Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm();
|
|
|
|
code->movdqa(xmm_ge, xmm_a);
|
|
|
|
code->psubsw(xmm_ge, xmm_b);
|
|
|
|
code->movd(reg_ge, xmm_ge);
|
|
|
|
}
|
|
|
|
code->psubw(xmm_a, xmm_b);
|
|
|
|
code->movd(reg_a, xmm_a);
|
|
|
|
if (ge_inst) {
|
|
|
|
code->not_(reg_ge);
|
|
|
|
ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge);
|
2016-12-05 00:27:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedHalvingAddU8(IR::Block&, IR::Inst* inst) {
|
2016-11-25 20:32:22 +00:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
// This code path requires SSSE3 because of the PSHUFB instruction.
|
|
|
|
// A fallback implementation is provided below.
|
|
|
|
if (cpu_info.has(Xbyak::util::Cpu::tSSSE3)) {
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 arg = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
|
|
|
|
// Load the operands into Xmm registers
|
|
|
|
Xbyak::Xmm xmm_scratch_a = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Xmm xmm_scratch_b = reg_alloc.ScratchXmm();
|
|
|
|
|
|
|
|
Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Reg64 mask = reg_alloc.ScratchGpr();
|
|
|
|
|
|
|
|
code->movd(xmm_scratch_a, result);
|
|
|
|
code->movd(xmm_scratch_b, arg);
|
|
|
|
|
|
|
|
// Set the mask to expand the values
|
|
|
|
// 0xAABBCCDD becomes 0x00AA00BB00CC00DD
|
|
|
|
code->mov(mask, 0x8003800280018000);
|
|
|
|
code->movq(xmm_mask, mask);
|
|
|
|
|
|
|
|
// Expand each 8-bit value to 16-bit
|
|
|
|
code->pshufb(xmm_scratch_a, xmm_mask);
|
|
|
|
code->pshufb(xmm_scratch_b, xmm_mask);
|
|
|
|
|
|
|
|
// Add the individual 16-bit values
|
|
|
|
code->paddw(xmm_scratch_a, xmm_scratch_b);
|
|
|
|
|
|
|
|
// Shift the 16-bit values to the right to halve them
|
|
|
|
code->psrlw(xmm_scratch_a, 1);
|
|
|
|
|
|
|
|
// Set the mask to pack the values again
|
|
|
|
// 0x00AA00BB00CC00DD becomes 0xAABBCCDD
|
|
|
|
code->mov(mask, 0x06040200);
|
|
|
|
code->movq(xmm_mask, mask);
|
|
|
|
|
|
|
|
// Shuffle them back to 8-bit values
|
|
|
|
code->pshufb(xmm_scratch_a, xmm_mask);
|
|
|
|
|
|
|
|
code->movd(result, xmm_scratch_a);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fallback implementation in case the CPU doesn't support SSSE3
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 and_a_b = reg_a;
|
|
|
|
Xbyak::Reg32 result = reg_a;
|
|
|
|
|
|
|
|
code->mov(xor_a_b, reg_a);
|
|
|
|
code->and(and_a_b, reg_b);
|
|
|
|
code->xor(xor_a_b, reg_b);
|
|
|
|
code->shr(xor_a_b, 1);
|
|
|
|
code->and(xor_a_b, 0x7F7F7F7F);
|
|
|
|
code->add(result, xor_a_b);
|
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedHalvingAddU16(IR::Block&, IR::Inst* inst) {
|
2016-11-26 11:28:20 +00:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 and_a_b = reg_a;
|
|
|
|
Xbyak::Reg32 result = reg_a;
|
|
|
|
|
|
|
|
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
|
|
|
// Note that x^y always contains the LSB of the result.
|
|
|
|
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
|
|
|
// We mask by 0x7FFF to remove the LSB so that it doesn't leak into the field below.
|
|
|
|
|
|
|
|
code->mov(xor_a_b, reg_a);
|
|
|
|
code->and(and_a_b, reg_b);
|
|
|
|
code->xor(xor_a_b, reg_b);
|
|
|
|
code->shr(xor_a_b, 1);
|
|
|
|
code->and(xor_a_b, 0x7FFF7FFF);
|
|
|
|
code->add(result, xor_a_b);
|
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedHalvingAddS8(IR::Block&, IR::Inst* inst) {
|
2016-11-26 18:12:29 +00:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 and_a_b = reg_a;
|
|
|
|
Xbyak::Reg32 result = reg_a;
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
|
|
|
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
|
|
|
// Note that x^y always contains the LSB of the result.
|
|
|
|
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
|
|
|
// We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
|
|
|
|
// carry propagates the sign bit from (x^y)>>1 upwards by one.
|
|
|
|
|
|
|
|
code->mov(xor_a_b, reg_a);
|
|
|
|
code->and(and_a_b, reg_b);
|
|
|
|
code->xor(xor_a_b, reg_b);
|
|
|
|
code->mov(carry, xor_a_b);
|
|
|
|
code->and(carry, 0x80808080);
|
|
|
|
code->shr(xor_a_b, 1);
|
|
|
|
code->and(xor_a_b, 0x7F7F7F7F);
|
|
|
|
code->add(result, xor_a_b);
|
|
|
|
code->xor(result, carry);
|
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedHalvingAddS16(IR::Block&, IR::Inst* inst) {
|
2016-11-26 18:12:29 +00:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 and_a_b = reg_a;
|
|
|
|
Xbyak::Reg32 result = reg_a;
|
|
|
|
Xbyak::Reg32 carry = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
|
|
|
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
|
|
|
// Note that x^y always contains the LSB of the result.
|
|
|
|
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
|
|
|
// We mask by 0x7FFF to remove the LSB so that it doesn't leak into the field below.
|
|
|
|
// carry propagates the sign bit from (x^y)>>1 upwards by one.
|
|
|
|
|
|
|
|
code->mov(xor_a_b, reg_a);
|
|
|
|
code->and(and_a_b, reg_b);
|
|
|
|
code->xor(xor_a_b, reg_b);
|
|
|
|
code->mov(carry, xor_a_b);
|
|
|
|
code->and(carry, 0x80008000);
|
|
|
|
code->shr(xor_a_b, 1);
|
|
|
|
code->and(xor_a_b, 0x7FFF7FFF);
|
|
|
|
code->add(result, xor_a_b);
|
|
|
|
code->xor(result, carry);
|
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedHalvingSubU8(IR::Block&, IR::Inst* inst) {
|
2016-11-26 18:27:21 +00:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32();
|
|
|
|
|
|
|
|
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
|
|
|
// Note that x^y always contains the LSB of the result.
|
|
|
|
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
|
|
|
|
|
|
|
code->xor(minuend, subtrahend);
|
|
|
|
code->and(subtrahend, minuend);
|
|
|
|
code->shr(minuend, 1);
|
|
|
|
|
|
|
|
// At this point,
|
|
|
|
// minuend := (a^b) >> 1
|
|
|
|
// subtrahend := (a^b) & b
|
|
|
|
|
|
|
|
// We must now perform a partitioned subtraction.
|
|
|
|
// We can do this because minuend contains 7 bit fields.
|
|
|
|
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
|
|
|
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
|
|
|
code->or(minuend, 0x80808080);
|
|
|
|
code->sub(minuend, subtrahend);
|
|
|
|
code->xor(minuend, 0x80808080);
|
|
|
|
|
|
|
|
// minuend now contains the desired result.
|
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedHalvingSubU16(IR::Block&, IR::Inst* inst) {
|
2016-11-26 18:27:21 +00:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32();
|
|
|
|
|
|
|
|
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
|
|
|
// Note that x^y always contains the LSB of the result.
|
|
|
|
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
|
|
|
|
|
|
|
code->xor(minuend, subtrahend);
|
|
|
|
code->and(subtrahend, minuend);
|
|
|
|
code->shr(minuend, 1);
|
|
|
|
|
|
|
|
// At this point,
|
|
|
|
// minuend := (a^b) >> 1
|
|
|
|
// subtrahend := (a^b) & b
|
|
|
|
|
|
|
|
// We must now perform a partitioned subtraction.
|
|
|
|
// We can do this because minuend contains 15 bit fields.
|
|
|
|
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
|
|
|
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
|
|
|
code->or(minuend, 0x80008000);
|
|
|
|
code->sub(minuend, subtrahend);
|
|
|
|
code->xor(minuend, 0x80008000);
|
|
|
|
|
|
|
|
// minuend now contains the desired result.
|
|
|
|
}
|
|
|
|
|
2016-12-04 20:52:33 +00:00
|
|
|
static void EmitPackedOperation(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
|
|
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
|
|
|
Xbyak::Reg32 arg = reg_alloc.UseGpr(b).cvt32();
|
|
|
|
|
|
|
|
Xbyak::Xmm xmm_scratch_a = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Xmm xmm_scratch_b = reg_alloc.ScratchXmm();
|
|
|
|
|
|
|
|
code->movd(xmm_scratch_a, result);
|
|
|
|
code->movd(xmm_scratch_b, arg);
|
|
|
|
|
|
|
|
(code->*fn)(xmm_scratch_a, xmm_scratch_b);
|
|
|
|
|
|
|
|
code->movd(result, xmm_scratch_a);
|
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedSaturatedAddU8(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusb);
|
2016-08-12 18:26:14 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedSaturatedAddS8(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddsb);
|
2016-08-12 18:26:14 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedSaturatedSubU8(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubusb);
|
2016-08-12 18:18:38 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedSaturatedSubS8(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsb);
|
2016-08-12 18:18:38 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedSaturatedAddU16(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusw);
|
2016-08-12 18:42:16 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedSaturatedAddS16(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddsw);
|
2016-08-12 18:42:16 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedSaturatedSubU16(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubusw);
|
2016-08-12 18:42:16 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitPackedSaturatedSubS16(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsw);
|
2016-08-12 18:42:16 +01:00
|
|
|
}
|
|
|
|
|
2016-12-17 19:52:22 +00:00
|
|
|
void EmitX64::EmitPackedAbsDiffSumS8(IR::Block&, IR::Inst* inst) {
|
|
|
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psadbw);
|
|
|
|
}
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
2016-08-06 17:21:29 +01:00
|
|
|
// We need to report back whether we've found a denormal on input.
|
|
|
|
// SSE doesn't do this for us when SSE's DAZ is enabled.
|
2016-08-24 20:07:08 +01:00
|
|
|
|
|
|
|
code->movd(gpr_scratch, xmm_value);
|
|
|
|
code->and_(gpr_scratch, u32(0x7FFFFFFF));
|
|
|
|
code->sub(gpr_scratch, u32(1));
|
|
|
|
code->cmp(gpr_scratch, u32(0x007FFFFE));
|
|
|
|
code->ja(end);
|
|
|
|
code->pxor(xmm_value, xmm_value);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_IDC)], u32(1 << 7));
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
|
|
|
auto mask = code->MFloatNonSignMask64();
|
|
|
|
mask.setBit(64);
|
|
|
|
auto penult_denormal = code->MFloatPenultimatePositiveDenormal64();
|
|
|
|
penult_denormal.setBit(64);
|
|
|
|
|
|
|
|
code->movq(gpr_scratch, xmm_value);
|
|
|
|
code->and_(gpr_scratch, mask);
|
|
|
|
code->sub(gpr_scratch, u32(1));
|
|
|
|
code->cmp(gpr_scratch, penult_denormal);
|
|
|
|
code->ja(end);
|
|
|
|
code->pxor(xmm_value, xmm_value);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_IDC)], u32(1 << 7));
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
|
|
|
code->movd(gpr_scratch, xmm_value);
|
|
|
|
code->and_(gpr_scratch, u32(0x7FFFFFFF));
|
|
|
|
code->sub(gpr_scratch, u32(1));
|
|
|
|
code->cmp(gpr_scratch, u32(0x007FFFFE));
|
|
|
|
code->ja(end);
|
|
|
|
code->pxor(xmm_value, xmm_value);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_UFC)], u32(1 << 3));
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void FlushToZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
|
|
|
auto mask = code->MFloatNonSignMask64();
|
|
|
|
mask.setBit(64);
|
|
|
|
auto penult_denormal = code->MFloatPenultimatePositiveDenormal64();
|
|
|
|
penult_denormal.setBit(64);
|
|
|
|
|
|
|
|
code->movq(gpr_scratch, xmm_value);
|
|
|
|
code->and_(gpr_scratch, mask);
|
|
|
|
code->sub(gpr_scratch, u32(1));
|
|
|
|
code->cmp(gpr_scratch, penult_denormal);
|
|
|
|
code->ja(end);
|
|
|
|
code->pxor(xmm_value, xmm_value);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_UFC)], u32(1 << 3));
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DefaultNaN32(BlockOfCode* code, Xbyak::Xmm xmm_value) {
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
|
|
|
code->ucomiss(xmm_value, xmm_value);
|
|
|
|
code->jnp(end);
|
|
|
|
code->movaps(xmm_value, code->MFloatNaN32());
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DefaultNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value) {
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
|
|
|
code->ucomisd(xmm_value, xmm_value);
|
|
|
|
code->jnp(end);
|
|
|
|
code->movaps(xmm_value, code->MFloatNaN64());
|
|
|
|
code->L(end);
|
|
|
|
}
|
|
|
|
|
2016-08-26 15:23:08 +01:00
|
|
|
static void ZeroIfNaN64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
|
|
|
|
code->pxor(xmm_scratch, xmm_scratch);
|
|
|
|
code->cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
|
|
|
code->pand(xmm_value, xmm_scratch);
|
2016-08-24 20:07:08 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void FPThreeOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
2016-08-06 17:21:29 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
|
|
|
Xbyak::Xmm operand = reg_alloc.UseXmm(b);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-06 17:21:29 +01:00
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-06 17:21:29 +01:00
|
|
|
DenormalsAreZero32(code, result, gpr_scratch);
|
|
|
|
DenormalsAreZero32(code, operand, gpr_scratch);
|
|
|
|
}
|
2016-08-24 20:07:08 +01:00
|
|
|
(code->*fn)(result, operand);
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-06 17:21:29 +01:00
|
|
|
FlushToZero32(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-07 18:08:48 +01:00
|
|
|
DefaultNaN32(code, result);
|
2016-08-06 17:21:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
static void FPThreeOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
2016-08-06 17:21:29 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
|
|
|
Xbyak::Xmm operand = reg_alloc.UseXmm(b);
|
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-06 17:21:29 +01:00
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 18:08:48 +01:00
|
|
|
DenormalsAreZero64(code, result, gpr_scratch);
|
|
|
|
DenormalsAreZero64(code, operand, gpr_scratch);
|
2016-08-06 17:21:29 +01:00
|
|
|
}
|
2016-08-24 20:07:08 +01:00
|
|
|
(code->*fn)(result, operand);
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 18:08:48 +01:00
|
|
|
FlushToZero64(code, result, gpr_scratch);
|
2016-08-06 17:21:29 +01:00
|
|
|
}
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-07 18:08:48 +01:00
|
|
|
DefaultNaN64(code, result);
|
2016-08-06 17:21:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
static void FPTwoOp32(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
2016-08-07 12:19:07 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-07 12:19:07 +01:00
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 12:19:07 +01:00
|
|
|
DenormalsAreZero32(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-25 16:34:05 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
(code->*fn)(result, result);
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 12:19:07 +01:00
|
|
|
FlushToZero32(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-07 18:08:48 +01:00
|
|
|
DefaultNaN32(code, result);
|
2016-08-07 12:19:07 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
static void FPTwoOp64(BlockOfCode* code, RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
2016-08-07 12:19:07 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-07 12:19:07 +01:00
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 18:08:48 +01:00
|
|
|
DenormalsAreZero64(code, result, gpr_scratch);
|
2016-08-07 12:19:07 +01:00
|
|
|
}
|
2016-08-25 16:34:05 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
(code->*fn)(result, result);
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-07 18:08:48 +01:00
|
|
|
FlushToZero64(code, result, gpr_scratch);
|
2016-08-07 12:19:07 +01:00
|
|
|
}
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-07 18:08:48 +01:00
|
|
|
DefaultNaN64(code, result);
|
2016-08-07 12:19:07 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitTransferFromFP32(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(0));
|
2016-08-07 19:25:12 +01:00
|
|
|
// TODO: Eliminate this.
|
2016-08-24 20:07:08 +01:00
|
|
|
code->movd(result, source);
|
2016-08-07 19:25:12 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitTransferFromFP64(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg64 result = reg_alloc.DefGpr(inst);
|
|
|
|
Xbyak::Xmm source = reg_alloc.UseXmm(inst->GetArg(0));
|
2016-08-07 19:25:12 +01:00
|
|
|
// TODO: Eliminate this.
|
2016-08-24 20:07:08 +01:00
|
|
|
code->movq(result, source);
|
2016-08-07 19:25:12 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitTransferToFP32(IR::Block&, IR::Inst* inst) {
|
2016-12-03 11:36:03 +00:00
|
|
|
if (inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetU32() == 0) {
|
|
|
|
Xbyak::Xmm result = reg_alloc.DefXmm(inst);
|
|
|
|
code->xorps(result, result);
|
|
|
|
} else {
|
|
|
|
Xbyak::Xmm result = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 source = reg_alloc.UseGpr(inst->GetArg(0)).cvt32();
|
|
|
|
// TODO: Eliminate this.
|
|
|
|
code->movd(result, source);
|
|
|
|
}
|
2016-08-07 19:25:12 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitTransferToFP64(IR::Block&, IR::Inst* inst) {
|
2016-12-03 11:36:03 +00:00
|
|
|
if (inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetU64() == 0) {
|
|
|
|
Xbyak::Xmm result = reg_alloc.DefXmm(inst);
|
|
|
|
code->xorpd(result, result);
|
|
|
|
} else {
|
|
|
|
Xbyak::Xmm result = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg64 source = reg_alloc.UseGpr(inst->GetArg(0));
|
|
|
|
// TODO: Eliminate this.
|
|
|
|
code->movq(result, source);
|
|
|
|
}
|
2016-08-07 19:25:12 +01:00
|
|
|
}
|
|
|
|
|
2016-08-07 10:21:14 +01:00
|
|
|
void EmitX64::EmitFPAbs32(IR::Block&, IR::Inst* inst) {
|
2016-08-07 01:41:25 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
2016-08-07 01:41:25 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->pand(result, code->MFloatNonSignMask32());
|
2016-08-07 01:41:25 +01:00
|
|
|
}
|
|
|
|
|
2016-08-07 10:21:14 +01:00
|
|
|
void EmitX64::EmitFPAbs64(IR::Block&, IR::Inst* inst) {
|
2016-08-07 01:41:25 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
2016-08-07 01:41:25 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->pand(result, code->MFloatNonSignMask64());
|
2016-08-07 10:21:14 +01:00
|
|
|
}
|
|
|
|
|
2016-08-07 10:56:12 +01:00
|
|
|
void EmitX64::EmitFPNeg32(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
2016-08-07 10:56:12 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->pxor(result, code->MFloatNegativeZero32());
|
2016-08-07 10:56:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPNeg64(IR::Block&, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
2016-08-07 10:56:12 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->pxor(result, code->MFloatNegativeZero64());
|
2016-08-07 10:56:12 +01:00
|
|
|
}
|
|
|
|
|
2016-08-07 10:21:14 +01:00
|
|
|
void EmitX64::EmitFPAdd32(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::addss);
|
2016-08-07 10:21:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPAdd64(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::addsd);
|
2016-08-07 10:21:14 +01:00
|
|
|
}
|
|
|
|
|
2016-08-07 10:56:12 +01:00
|
|
|
void EmitX64::EmitFPDiv32(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::divss);
|
2016-08-07 10:56:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPDiv64(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::divsd);
|
2016-08-07 10:56:12 +01:00
|
|
|
}
|
|
|
|
|
2016-08-07 10:21:14 +01:00
|
|
|
void EmitX64::EmitFPMul32(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::mulss);
|
2016-08-07 10:21:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPMul64(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::mulsd);
|
2016-08-07 12:19:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPSqrt32(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
FPTwoOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::sqrtss);
|
2016-08-07 12:19:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPSqrt64(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
FPTwoOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::sqrtsd);
|
2016-08-07 10:21:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPSub32(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
FPThreeOp32(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::subss);
|
2016-08-07 10:21:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPSub64(IR::Block& block, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
FPThreeOp64(code, reg_alloc, block, inst, &Xbyak::CodeGenerator::subsd);
|
2016-08-07 01:41:25 +01:00
|
|
|
}
|
|
|
|
|
2016-12-04 11:43:31 +00:00
|
|
|
static void SetFpscrNzcvFromFlags(BlockOfCode* code, RegAlloc& reg_alloc) {
|
|
|
|
reg_alloc.ScratchGpr({HostLoc::RAX}); // lahf requires use of ah
|
|
|
|
Xbyak::Reg32 nzcv_imm = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Reg32 nzcv = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
|
|
|
code->lahf();
|
|
|
|
code->mov(nzcv_imm, 0x30000000);
|
|
|
|
code->cmp(ah, 0b01000111);
|
|
|
|
code->cmove(nzcv, nzcv_imm);
|
|
|
|
code->mov(nzcv_imm, 0x20000000);
|
|
|
|
code->cmp(ah, 0b00000010);
|
|
|
|
code->cmove(nzcv, nzcv_imm);
|
|
|
|
code->mov(nzcv_imm, 0x80000000);
|
|
|
|
code->cmp(ah, 0b00000011);
|
|
|
|
code->cmove(nzcv, nzcv_imm);
|
|
|
|
code->mov(nzcv_imm, 0x60000000);
|
|
|
|
code->cmp(ah, 0b01000010);
|
|
|
|
code->cmove(nzcv, nzcv_imm);
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, FPSCR_nzcv)], nzcv);
|
2016-11-26 11:17:16 +00:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitFPCompare32(IR::Block&, IR::Inst* inst) {
|
2016-11-26 11:17:16 +00:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
bool quiet = inst->GetArg(2).GetU1();
|
|
|
|
|
|
|
|
Xbyak::Xmm reg_a = reg_alloc.UseXmm(a);
|
|
|
|
Xbyak::Xmm reg_b = reg_alloc.UseXmm(b);
|
|
|
|
|
|
|
|
if (quiet) {
|
|
|
|
code->ucomiss(reg_a, reg_b);
|
|
|
|
} else {
|
|
|
|
code->comiss(reg_a, reg_b);
|
|
|
|
}
|
|
|
|
|
2016-12-04 11:43:31 +00:00
|
|
|
SetFpscrNzcvFromFlags(code, reg_alloc);
|
2016-11-26 11:17:16 +00:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitFPCompare64(IR::Block&, IR::Inst* inst) {
|
2016-11-26 11:17:16 +00:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
IR::Value b = inst->GetArg(1);
|
|
|
|
bool quiet = inst->GetArg(2).GetU1();
|
|
|
|
|
|
|
|
Xbyak::Xmm reg_a = reg_alloc.UseXmm(a);
|
|
|
|
Xbyak::Xmm reg_b = reg_alloc.UseXmm(b);
|
|
|
|
|
|
|
|
if (quiet) {
|
|
|
|
code->ucomisd(reg_a, reg_b);
|
|
|
|
} else {
|
|
|
|
code->comisd(reg_a, reg_b);
|
|
|
|
}
|
|
|
|
|
2016-12-04 11:43:31 +00:00
|
|
|
SetFpscrNzcvFromFlags(code, reg_alloc);
|
2016-11-26 11:17:16 +00:00
|
|
|
}
|
|
|
|
|
2016-08-23 22:04:46 +01:00
|
|
|
void EmitX64::EmitFPSingleToDouble(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-23 22:04:46 +01:00
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
DenormalsAreZero32(code, result, gpr_scratch.cvt32());
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtss2sd(result, result);
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-23 22:04:46 +01:00
|
|
|
FlushToZero64(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-23 22:04:46 +01:00
|
|
|
DefaultNaN64(code, result);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPDoubleToSingle(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm result = reg_alloc.UseDefXmm(a, inst);
|
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-23 22:04:46 +01:00
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-23 22:04:46 +01:00
|
|
|
DenormalsAreZero64(code, result, gpr_scratch);
|
|
|
|
}
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtsd2ss(result, result);
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
FlushToZero32(code, result, gpr_scratch.cvt32());
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().DN()) {
|
2016-08-23 22:04:46 +01:00
|
|
|
DefaultNaN32(code, result);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPSingleToS32(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_towards_zero = inst->GetArg(1).GetU1();
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseScratchXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-26 15:23:08 +01:00
|
|
|
Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm();
|
2016-08-23 22:04:46 +01:00
|
|
|
|
|
|
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
|
|
// Conversion to double is lossless, and allows for clamping.
|
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-23 22:04:46 +01:00
|
|
|
DenormalsAreZero32(code, from, gpr_scratch);
|
|
|
|
}
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtss2sd(from, from);
|
2016-08-23 22:04:46 +01:00
|
|
|
// First time is to set flags
|
|
|
|
if (round_towards_zero) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
|
|
|
// Clamp to output range
|
2016-08-26 15:23:08 +01:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-24 20:07:08 +01:00
|
|
|
code->minsd(from, code->MFloatMaxS32());
|
|
|
|
code->maxsd(from, code->MFloatMinS32());
|
2016-08-23 22:04:46 +01:00
|
|
|
// Second time is for real
|
|
|
|
if (round_towards_zero) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
2016-08-24 20:07:08 +01:00
|
|
|
code->movd(to, gpr_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPSingleToU32(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_towards_zero = inst->GetArg(1).GetU1();
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseScratchXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-26 15:23:08 +01:00
|
|
|
Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm();
|
2016-08-23 22:04:46 +01:00
|
|
|
|
|
|
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
|
|
// Conversion to double is lossless, and allows for accurate clamping.
|
|
|
|
//
|
|
|
|
// Since SSE2 doesn't provide an unsigned conversion, we shift the range as appropriate.
|
|
|
|
//
|
|
|
|
// FIXME: Inexact exception not correctly signalled with the below code
|
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().RMode() != Arm::FPSCR::RoundingMode::TowardsZero && !round_towards_zero) {
|
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-23 22:04:46 +01:00
|
|
|
DenormalsAreZero32(code, from, gpr_scratch);
|
|
|
|
}
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtss2sd(from, from);
|
2016-08-26 15:23:08 +01:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
// Bring into SSE range
|
2016-08-24 20:07:08 +01:00
|
|
|
code->addsd(from, code->MFloatMinS32());
|
2016-08-23 22:04:46 +01:00
|
|
|
// First time is to set flags
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
// Clamp to output range
|
2016-08-24 20:07:08 +01:00
|
|
|
code->minsd(from, code->MFloatMaxS32());
|
|
|
|
code->maxsd(from, code->MFloatMinS32());
|
2016-08-23 22:04:46 +01:00
|
|
|
// Actually convert
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
// Bring back into original range
|
2016-08-24 20:07:08 +01:00
|
|
|
code->add(gpr_scratch, u32(2147483648u));
|
|
|
|
code->movd(to, gpr_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Reg32 gpr_mask = reg_alloc.ScratchGpr().cvt32();
|
2016-08-23 22:04:46 +01:00
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-23 22:04:46 +01:00
|
|
|
DenormalsAreZero32(code, from, gpr_scratch);
|
|
|
|
}
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtss2sd(from, from);
|
2016-08-26 15:23:08 +01:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
// Generate masks if out-of-signed-range
|
2016-08-24 20:07:08 +01:00
|
|
|
code->movaps(xmm_mask, code->MFloatMaxS32());
|
|
|
|
code->cmpltsd(xmm_mask, from);
|
|
|
|
code->movd(gpr_mask, xmm_mask);
|
|
|
|
code->pand(xmm_mask, code->MFloatMinS32());
|
|
|
|
code->and_(gpr_mask, u32(2147483648u));
|
2016-08-23 22:04:46 +01:00
|
|
|
// Bring into range if necessary
|
2016-08-24 20:07:08 +01:00
|
|
|
code->addsd(from, xmm_mask);
|
2016-08-23 22:04:46 +01:00
|
|
|
// First time is to set flags
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
// Clamp to output range
|
2016-08-24 20:07:08 +01:00
|
|
|
code->minsd(from, code->MFloatMaxS32());
|
|
|
|
code->maxsd(from, code->MFloatMinU32());
|
2016-08-23 22:04:46 +01:00
|
|
|
// Actually convert
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
// Bring back into original range if necessary
|
2016-08-24 20:07:08 +01:00
|
|
|
code->add(gpr_scratch, gpr_mask);
|
|
|
|
code->movd(to, gpr_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPDoubleToS32(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_towards_zero = inst->GetArg(1).GetU1();
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseScratchXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-26 15:23:08 +01:00
|
|
|
Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm();
|
2016-08-23 22:04:46 +01:00
|
|
|
|
|
|
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
DenormalsAreZero64(code, from, gpr_scratch.cvt64());
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
|
|
|
// First time is to set flags
|
|
|
|
if (round_towards_zero) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
|
|
|
// Clamp to output range
|
2016-08-26 15:23:08 +01:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-24 20:07:08 +01:00
|
|
|
code->minsd(from, code->MFloatMaxS32());
|
|
|
|
code->maxsd(from, code->MFloatMinS32());
|
2016-08-23 22:04:46 +01:00
|
|
|
// Second time is for real
|
|
|
|
if (round_towards_zero) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
2016-08-24 20:07:08 +01:00
|
|
|
code->movd(to, gpr_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitFPDoubleToU32(IR::Block& block, IR::Inst* inst) {
|
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_towards_zero = inst->GetArg(1).GetU1();
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseScratchXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
|
|
|
Xbyak::Xmm xmm_scratch = reg_alloc.ScratchXmm();
|
2016-08-23 22:04:46 +01:00
|
|
|
|
|
|
|
// ARM saturates on conversion; this differs from x64 which returns a sentinel value.
|
|
|
|
// TODO: Use VCVTPD2UDQ when AVX512VL is available.
|
|
|
|
// FIXME: Inexact exception not correctly signalled with the below code
|
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().RMode() != Arm::FPSCR::RoundingMode::TowardsZero && !round_towards_zero) {
|
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
DenormalsAreZero64(code, from, gpr_scratch.cvt64());
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
2016-08-26 15:23:08 +01:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
// Bring into SSE range
|
2016-08-24 20:07:08 +01:00
|
|
|
code->addsd(from, code->MFloatMinS32());
|
2016-08-23 22:04:46 +01:00
|
|
|
// First time is to set flags
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
// Clamp to output range
|
2016-08-24 20:07:08 +01:00
|
|
|
code->minsd(from, code->MFloatMaxS32());
|
|
|
|
code->maxsd(from, code->MFloatMinS32());
|
2016-08-23 22:04:46 +01:00
|
|
|
// Actually convert
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
// Bring back into original range
|
2016-08-24 20:07:08 +01:00
|
|
|
code->add(gpr_scratch, u32(2147483648u));
|
|
|
|
code->movd(to, gpr_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm xmm_mask = reg_alloc.ScratchXmm();
|
|
|
|
Xbyak::Reg32 gpr_mask = reg_alloc.ScratchGpr().cvt32();
|
2016-08-23 22:04:46 +01:00
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.Location().FPSCR().FTZ()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
DenormalsAreZero64(code, from, gpr_scratch.cvt64());
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
2016-08-26 15:23:08 +01:00
|
|
|
ZeroIfNaN64(code, from, xmm_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
// Generate masks if out-of-signed-range
|
2016-08-24 20:07:08 +01:00
|
|
|
code->movaps(xmm_mask, code->MFloatMaxS32());
|
|
|
|
code->cmpltsd(xmm_mask, from);
|
|
|
|
code->movd(gpr_mask, xmm_mask);
|
|
|
|
code->pand(xmm_mask, code->MFloatMinS32());
|
|
|
|
code->and_(gpr_mask, u32(2147483648u));
|
2016-08-23 22:04:46 +01:00
|
|
|
// Bring into range if necessary
|
2016-08-24 20:07:08 +01:00
|
|
|
code->addsd(from, xmm_mask);
|
2016-08-23 22:04:46 +01:00
|
|
|
// First time is to set flags
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
// Clamp to output range
|
2016-08-24 20:07:08 +01:00
|
|
|
code->minsd(from, code->MFloatMaxS32());
|
|
|
|
code->maxsd(from, code->MFloatMinU32());
|
2016-08-23 22:04:46 +01:00
|
|
|
// Actually convert
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvttsd2si(gpr_scratch, from); // 32 bit gpr
|
2016-08-23 22:04:46 +01:00
|
|
|
// Bring back into original range if necessary
|
2016-08-24 20:07:08 +01:00
|
|
|
code->add(gpr_scratch, gpr_mask);
|
|
|
|
code->movd(to, gpr_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitFPS32ToSingle(IR::Block&, IR::Inst* inst) {
|
2016-08-23 22:04:46 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_to_nearest = inst->GetArg(1).GetU1();
|
|
|
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-23 22:04:46 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->movd(gpr_scratch, from);
|
|
|
|
code->cvtsi2ss(to, gpr_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitFPU32ToSingle(IR::Block&, IR::Inst* inst) {
|
2016-08-23 22:04:46 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_to_nearest = inst->GetArg(1).GetU1();
|
|
|
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
2016-11-27 23:25:50 +00:00
|
|
|
// Use a 64-bit register to ensure we don't end up treating the input as signed
|
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-23 22:04:46 +01:00
|
|
|
|
2016-11-27 23:25:50 +00:00
|
|
|
code->movq(gpr_scratch, from);
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtsi2ss(to, gpr_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitFPS32ToDouble(IR::Block&, IR::Inst* inst) {
|
2016-08-23 22:04:46 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_to_nearest = inst->GetArg(1).GetU1();
|
|
|
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
|
|
|
Xbyak::Reg32 gpr_scratch = reg_alloc.ScratchGpr().cvt32();
|
2016-08-23 22:04:46 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->movd(gpr_scratch, from);
|
|
|
|
code->cvtsi2sd(to, gpr_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitFPU32ToDouble(IR::Block&, IR::Inst* inst) {
|
2016-08-23 22:04:46 +01:00
|
|
|
IR::Value a = inst->GetArg(0);
|
|
|
|
bool round_to_nearest = inst->GetArg(1).GetU1();
|
|
|
|
ASSERT_MSG(!round_to_nearest, "round_to_nearest unimplemented");
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Xmm from = reg_alloc.UseXmm(a);
|
|
|
|
Xbyak::Xmm to = reg_alloc.DefXmm(inst);
|
2016-11-27 23:25:50 +00:00
|
|
|
// Use a 64-bit register to ensure we don't end up treating the input as signed
|
|
|
|
Xbyak::Reg64 gpr_scratch = reg_alloc.ScratchGpr();
|
2016-08-23 22:04:46 +01:00
|
|
|
|
2016-11-27 23:25:50 +00:00
|
|
|
code->movq(gpr_scratch, from);
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cvtsi2sd(to, gpr_scratch);
|
2016-08-23 22:04:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 22:48:20 +01:00
|
|
|
void EmitX64::EmitClearExclusive(IR::Block&, IR::Inst*) {
|
2016-08-24 20:07:08 +01:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
|
|
|
code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 22:48:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitSetExclusive(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 22:48:20 +01:00
|
|
|
ASSERT(inst->GetArg(1).IsImmediate());
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 address = reg_alloc.UseGpr(inst->GetArg(0)).cvt32();
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 22:48:20 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(1));
|
|
|
|
code->mov(dword[r15 + offsetof(JitState, exclusive_address)], address);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 22:48:20 +01:00
|
|
|
}
|
|
|
|
|
2016-09-01 00:06:40 +01:00
|
|
|
template <typename FunctionPointer>
|
|
|
|
static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) {
|
|
|
|
if (!cb.page_table) {
|
|
|
|
reg_alloc.HostCall(inst, inst->GetArg(0));
|
|
|
|
code->CallFunction(fn);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
using namespace Xbyak::util;
|
2016-07-11 22:43:53 +01:00
|
|
|
|
2016-09-01 00:06:40 +01:00
|
|
|
Xbyak::Reg64 result = reg_alloc.DefGpr(inst, { ABI_RETURN });
|
|
|
|
Xbyak::Reg32 vaddr = reg_alloc.UseScratchGpr(inst->GetArg(0), { ABI_PARAM1 }).cvt32();
|
|
|
|
Xbyak::Reg64 page_index = reg_alloc.ScratchGpr();
|
|
|
|
Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr();
|
|
|
|
|
|
|
|
Xbyak::Label abort, end;
|
|
|
|
|
2016-12-05 16:29:36 +00:00
|
|
|
code->mov(rax, reinterpret_cast<u64>(cb.page_table));
|
2016-09-01 00:06:40 +01:00
|
|
|
code->mov(page_index.cvt32(), vaddr);
|
|
|
|
code->shr(page_index.cvt32(), 12);
|
|
|
|
code->mov(rax, qword[rax + page_index * 8]);
|
|
|
|
code->test(rax, rax);
|
|
|
|
code->jz(abort);
|
|
|
|
code->mov(page_offset.cvt32(), vaddr);
|
|
|
|
code->and_(page_offset.cvt32(), 4095);
|
|
|
|
switch (bit_size) {
|
|
|
|
case 8:
|
|
|
|
code->movzx(result, code->byte[rax + page_offset]);
|
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
code->movzx(result, word[rax + page_offset]);
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
code->mov(result.cvt32(), dword[rax + page_offset]);
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
code->mov(result.cvt64(), qword[rax + page_offset]);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Invalid bit_size");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
code->jmp(end);
|
|
|
|
code->L(abort);
|
|
|
|
code->call(code->GetMemoryReadCallback(bit_size));
|
|
|
|
code->L(end);
|
2016-07-11 22:43:53 +01:00
|
|
|
}
|
|
|
|
|
2016-09-01 00:06:40 +01:00
|
|
|
template<typename FunctionPointer>
|
|
|
|
static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) {
|
|
|
|
if (!cb.page_table) {
|
|
|
|
reg_alloc.HostCall(inst, inst->GetArg(0), inst->GetArg(1));
|
|
|
|
code->CallFunction(fn);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
using namespace Xbyak::util;
|
2016-07-11 22:43:53 +01:00
|
|
|
|
2016-09-01 00:06:40 +01:00
|
|
|
reg_alloc.ScratchGpr({ HostLoc::RAX });
|
|
|
|
Xbyak::Reg32 vaddr = reg_alloc.UseScratchGpr(inst->GetArg(0), { ABI_PARAM1 }).cvt32();
|
|
|
|
Xbyak::Reg64 value = reg_alloc.UseScratchGpr(inst->GetArg(1), { ABI_PARAM2 });
|
|
|
|
Xbyak::Reg64 page_index = reg_alloc.ScratchGpr();
|
|
|
|
Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr();
|
|
|
|
|
|
|
|
Xbyak::Label abort, end;
|
|
|
|
|
2016-12-05 16:29:36 +00:00
|
|
|
code->mov(rax, reinterpret_cast<u64>(cb.page_table));
|
2016-09-01 00:06:40 +01:00
|
|
|
code->mov(page_index.cvt32(), vaddr);
|
|
|
|
code->shr(page_index.cvt32(), 12);
|
|
|
|
code->mov(rax, qword[rax + page_index * 8]);
|
|
|
|
code->test(rax, rax);
|
|
|
|
code->jz(abort);
|
|
|
|
code->mov(page_offset.cvt32(), vaddr);
|
|
|
|
code->and_(page_offset.cvt32(), 4095);
|
|
|
|
switch (bit_size) {
|
|
|
|
case 8:
|
|
|
|
code->mov(code->byte[rax + page_offset], value.cvt8());
|
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
code->mov(word[rax + page_offset], value.cvt16());
|
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
code->mov(dword[rax + page_offset], value.cvt32());
|
|
|
|
break;
|
|
|
|
case 64:
|
|
|
|
code->mov(qword[rax + page_offset], value.cvt64());
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Invalid bit_size");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
code->jmp(end);
|
|
|
|
code->L(abort);
|
|
|
|
code->call(code->GetMemoryWriteCallback(bit_size));
|
|
|
|
code->L(end);
|
2016-07-11 22:43:53 +01:00
|
|
|
}
|
|
|
|
|
2016-09-01 00:06:40 +01:00
|
|
|
void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) {
|
|
|
|
ReadMemory(code, reg_alloc, inst, cb, 8, cb.MemoryRead8);
|
|
|
|
}
|
2016-07-11 22:43:53 +01:00
|
|
|
|
2016-09-01 00:06:40 +01:00
|
|
|
void EmitX64::EmitReadMemory16(IR::Block&, IR::Inst* inst) {
|
|
|
|
ReadMemory(code, reg_alloc, inst, cb, 16, cb.MemoryRead16);
|
2016-07-11 22:43:53 +01:00
|
|
|
}
|
|
|
|
|
2016-09-01 00:06:40 +01:00
|
|
|
void EmitX64::EmitReadMemory32(IR::Block&, IR::Inst* inst) {
|
|
|
|
ReadMemory(code, reg_alloc, inst, cb, 32, cb.MemoryRead32);
|
|
|
|
}
|
2016-07-11 22:43:53 +01:00
|
|
|
|
2016-09-01 00:06:40 +01:00
|
|
|
void EmitX64::EmitReadMemory64(IR::Block&, IR::Inst* inst) {
|
|
|
|
ReadMemory(code, reg_alloc, inst, cb, 64, cb.MemoryRead64);
|
2016-07-11 22:43:53 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitWriteMemory8(IR::Block&, IR::Inst* inst) {
|
2016-09-01 00:06:40 +01:00
|
|
|
WriteMemory(code, reg_alloc, inst, cb, 8, cb.MemoryWrite8);
|
2016-07-11 22:43:53 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitWriteMemory16(IR::Block&, IR::Inst* inst) {
|
2016-09-01 00:06:40 +01:00
|
|
|
WriteMemory(code, reg_alloc, inst, cb, 16, cb.MemoryWrite16);
|
2016-07-11 22:43:53 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitWriteMemory32(IR::Block&, IR::Inst* inst) {
|
2016-09-01 00:06:40 +01:00
|
|
|
WriteMemory(code, reg_alloc, inst, cb, 32, cb.MemoryWrite32);
|
2016-07-11 22:43:53 +01:00
|
|
|
}
|
|
|
|
|
2016-07-22 23:55:00 +01:00
|
|
|
void EmitX64::EmitWriteMemory64(IR::Block&, IR::Inst* inst) {
|
2016-09-01 00:06:40 +01:00
|
|
|
WriteMemory(code, reg_alloc, inst, cb, 64, cb.MemoryWrite64);
|
2016-07-11 22:43:53 +01:00
|
|
|
}
|
|
|
|
|
2016-08-31 21:09:26 +01:00
|
|
|
template <typename FunctionPointer>
|
|
|
|
static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, FunctionPointer fn) {
|
2016-08-24 20:07:08 +01:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 22:48:20 +01:00
|
|
|
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 passed = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers.
|
|
|
|
|
|
|
|
code->mov(passed, u32(1));
|
|
|
|
code->cmp(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
|
|
|
|
code->je(end);
|
|
|
|
code->mov(tmp, code->ABI_PARAM1);
|
|
|
|
code->xor_(tmp, dword[r15 + offsetof(JitState, exclusive_address)]);
|
|
|
|
code->test(tmp, JitState::RESERVATION_GRANULE_MASK);
|
|
|
|
code->jne(end);
|
|
|
|
code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
|
|
|
|
code->CallFunction(fn);
|
|
|
|
code->xor_(passed, passed);
|
|
|
|
code->L(end);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 22:48:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitExclusiveWriteMemory8(IR::Block&, IR::Inst* inst) {
|
2016-08-31 21:09:26 +01:00
|
|
|
ExclusiveWrite(code, reg_alloc, inst, cb.MemoryWrite8);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 22:48:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitExclusiveWriteMemory16(IR::Block&, IR::Inst* inst) {
|
2016-08-31 21:09:26 +01:00
|
|
|
ExclusiveWrite(code, reg_alloc, inst, cb.MemoryWrite16);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 22:48:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitExclusiveWriteMemory32(IR::Block&, IR::Inst* inst) {
|
2016-08-31 21:09:26 +01:00
|
|
|
ExclusiveWrite(code, reg_alloc, inst, cb.MemoryWrite32);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 22:48:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitExclusiveWriteMemory64(IR::Block&, IR::Inst* inst) {
|
2016-08-24 20:07:08 +01:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
Xbyak::Label end;
|
|
|
|
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 22:48:20 +01:00
|
|
|
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Reg32 passed = reg_alloc.DefGpr(inst).cvt32();
|
|
|
|
Xbyak::Reg64 value_hi = reg_alloc.UseScratchGpr(inst->GetArg(2));
|
|
|
|
Xbyak::Reg64 value = code->ABI_PARAM2;
|
|
|
|
Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers.
|
|
|
|
|
|
|
|
code->mov(passed, u32(1));
|
|
|
|
code->cmp(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
|
|
|
|
code->je(end);
|
|
|
|
code->mov(tmp, code->ABI_PARAM1);
|
|
|
|
code->xor_(tmp, dword[r15 + offsetof(JitState, exclusive_address)]);
|
|
|
|
code->test(tmp, JitState::RESERVATION_GRANULE_MASK);
|
|
|
|
code->jne(end);
|
|
|
|
code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
|
|
|
|
code->mov(value.cvt32(), value.cvt32()); // zero extend to 64-bits
|
|
|
|
code->shl(value_hi, 32);
|
|
|
|
code->or_(value, value_hi);
|
2016-08-31 21:09:26 +01:00
|
|
|
code->CallFunction(cb.MemoryWrite64);
|
2016-08-24 20:07:08 +01:00
|
|
|
code->xor_(passed, passed);
|
|
|
|
code->L(end);
|
TranslateArm: Implement CLREX, LDREX, LDREXB, LDREXD, LDREXH, STREX, STREXB, STREXD, STREXH, SWP, SWPB
2016-08-09 22:48:20 +01:00
|
|
|
}
|
2016-07-11 22:43:53 +01:00
|
|
|
|
2016-07-04 14:37:50 +01:00
|
|
|
void EmitX64::EmitAddCycles(size_t cycles) {
|
2016-08-24 20:07:08 +01:00
|
|
|
using namespace Xbyak::util;
|
2016-07-04 14:37:50 +01:00
|
|
|
ASSERT(cycles < std::numeric_limits<u32>::max());
|
2016-08-24 20:07:08 +01:00
|
|
|
code->sub(qword[r15 + offsetof(JitState, cycles_remaining)], static_cast<u32>(cycles));
|
2016-07-07 10:53:09 +01:00
|
|
|
}
|
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
static Xbyak::Label EmitCond(BlockOfCode* code, Arm::Cond cond) {
|
|
|
|
using namespace Xbyak::util;
|
2016-07-14 12:52:53 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Label label;
|
2016-07-14 12:52:53 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
const Xbyak::Reg32 cpsr = eax;
|
|
|
|
code->mov(cpsr, MJitStateCpsr());
|
2016-07-14 12:52:53 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
constexpr size_t n_shift = 31;
|
|
|
|
constexpr size_t z_shift = 30;
|
|
|
|
constexpr size_t c_shift = 29;
|
|
|
|
constexpr size_t v_shift = 28;
|
|
|
|
constexpr u32 n_mask = 1u << n_shift;
|
|
|
|
constexpr u32 z_mask = 1u << z_shift;
|
|
|
|
constexpr u32 c_mask = 1u << c_shift;
|
|
|
|
constexpr u32 v_mask = 1u << v_shift;
|
2016-07-14 12:52:53 +01:00
|
|
|
|
|
|
|
switch (cond) {
|
2016-08-22 23:40:30 +01:00
|
|
|
case Arm::Cond::EQ: //z
|
2016-08-24 20:07:08 +01:00
|
|
|
code->test(cpsr, z_mask);
|
|
|
|
code->jnz(label);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
case Arm::Cond::NE: //!z
|
2016-08-24 20:07:08 +01:00
|
|
|
code->test(cpsr, z_mask);
|
|
|
|
code->jz(label);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
case Arm::Cond::CS: //c
|
2016-08-24 20:07:08 +01:00
|
|
|
code->test(cpsr, c_mask);
|
|
|
|
code->jnz(label);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
case Arm::Cond::CC: //!c
|
2016-08-24 20:07:08 +01:00
|
|
|
code->test(cpsr, c_mask);
|
|
|
|
code->jz(label);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
case Arm::Cond::MI: //n
|
2016-08-24 20:07:08 +01:00
|
|
|
code->test(cpsr, n_mask);
|
|
|
|
code->jnz(label);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
case Arm::Cond::PL: //!n
|
2016-08-24 20:07:08 +01:00
|
|
|
code->test(cpsr, n_mask);
|
|
|
|
code->jz(label);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
case Arm::Cond::VS: //v
|
2016-08-24 20:07:08 +01:00
|
|
|
code->test(cpsr, v_mask);
|
|
|
|
code->jnz(label);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
case Arm::Cond::VC: //!v
|
2016-08-24 20:07:08 +01:00
|
|
|
code->test(cpsr, v_mask);
|
|
|
|
code->jz(label);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
case Arm::Cond::HI: { //c & !z
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(cpsr, z_mask | c_mask);
|
|
|
|
code->cmp(cpsr, c_mask);
|
|
|
|
code->je(label);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::LS: { //!c | z
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(cpsr, z_mask | c_mask);
|
|
|
|
code->cmp(cpsr, c_mask);
|
|
|
|
code->jne(label);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::GE: { // n == v
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(cpsr, n_mask | v_mask);
|
|
|
|
code->jz(label);
|
|
|
|
code->cmp(cpsr, n_mask | v_mask);
|
|
|
|
code->je(label);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::LT: { // n != v
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Label fail;
|
|
|
|
code->and_(cpsr, n_mask | v_mask);
|
|
|
|
code->jz(fail);
|
|
|
|
code->cmp(cpsr, n_mask | v_mask);
|
|
|
|
code->jne(label);
|
|
|
|
code->L(fail);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::GT: { // !z & (n == v)
|
2016-08-24 20:07:08 +01:00
|
|
|
const Xbyak::Reg32 tmp1 = ebx;
|
|
|
|
const Xbyak::Reg32 tmp2 = esi;
|
|
|
|
code->mov(tmp1, cpsr);
|
|
|
|
code->mov(tmp2, cpsr);
|
|
|
|
code->shr(tmp1, n_shift);
|
|
|
|
code->shr(tmp2, v_shift);
|
|
|
|
code->shr(cpsr, z_shift);
|
|
|
|
code->xor_(tmp1, tmp2);
|
|
|
|
code->or_(tmp1, cpsr);
|
|
|
|
code->test(tmp1, 1);
|
|
|
|
code->jz(label);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Arm::Cond::LE: { // z | (n != v)
|
2016-08-24 20:07:08 +01:00
|
|
|
const Xbyak::Reg32 tmp1 = ebx;
|
|
|
|
const Xbyak::Reg32 tmp2 = esi;
|
|
|
|
code->mov(tmp1, cpsr);
|
|
|
|
code->mov(tmp2, cpsr);
|
|
|
|
code->shr(tmp1, n_shift);
|
|
|
|
code->shr(tmp2, v_shift);
|
|
|
|
code->shr(cpsr, z_shift);
|
|
|
|
code->xor_(tmp1, tmp2);
|
|
|
|
code->or_(tmp1, cpsr);
|
|
|
|
code->test(tmp1, 1);
|
|
|
|
code->jnz(label);
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
2016-08-26 16:43:51 +01:00
|
|
|
ASSERT_MSG(false, "Unknown cond %zu", static_cast<size_t>(cond));
|
2016-08-22 23:40:30 +01:00
|
|
|
break;
|
2016-07-14 12:52:53 +01:00
|
|
|
}
|
2016-07-18 21:04:39 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
return label;
|
2016-07-18 21:04:39 +01:00
|
|
|
}
|
|
|
|
|
2016-08-18 18:16:18 +01:00
|
|
|
void EmitX64::EmitCondPrelude(const IR::Block& block) {
|
2016-08-25 15:35:50 +01:00
|
|
|
if (block.GetCondition() == Arm::Cond::AL) {
|
|
|
|
ASSERT(!block.HasConditionFailedLocation());
|
2016-07-18 21:04:39 +01:00
|
|
|
return;
|
2016-07-14 12:52:53 +01:00
|
|
|
}
|
|
|
|
|
2016-08-25 15:35:50 +01:00
|
|
|
ASSERT(block.HasConditionFailedLocation());
|
2016-07-18 21:04:39 +01:00
|
|
|
|
2016-08-25 16:34:05 +01:00
|
|
|
Xbyak::Label pass = EmitCond(code, block.GetCondition());
|
2016-08-25 15:35:50 +01:00
|
|
|
EmitAddCycles(block.ConditionFailedCycleCount());
|
|
|
|
EmitTerminalLinkBlock(IR::Term::LinkBlock{block.ConditionFailedLocation()}, block.Location());
|
2016-08-24 20:07:08 +01:00
|
|
|
code->L(pass);
|
2016-07-14 12:52:53 +01:00
|
|
|
}
|
|
|
|
|
2016-09-05 11:54:09 +01:00
|
|
|
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) {
|
2016-07-07 10:53:09 +01:00
|
|
|
switch (terminal.which()) {
|
|
|
|
case 1:
|
|
|
|
EmitTerminalInterpret(boost::get<IR::Term::Interpret>(terminal), initial_location);
|
|
|
|
return;
|
|
|
|
case 2:
|
|
|
|
EmitTerminalReturnToDispatch(boost::get<IR::Term::ReturnToDispatch>(terminal), initial_location);
|
|
|
|
return;
|
|
|
|
case 3:
|
|
|
|
EmitTerminalLinkBlock(boost::get<IR::Term::LinkBlock>(terminal), initial_location);
|
|
|
|
return;
|
|
|
|
case 4:
|
|
|
|
EmitTerminalLinkBlockFast(boost::get<IR::Term::LinkBlockFast>(terminal), initial_location);
|
|
|
|
return;
|
|
|
|
case 5:
|
|
|
|
EmitTerminalPopRSBHint(boost::get<IR::Term::PopRSBHint>(terminal), initial_location);
|
|
|
|
return;
|
|
|
|
case 6:
|
|
|
|
EmitTerminalIf(boost::get<IR::Term::If>(terminal), initial_location);
|
|
|
|
return;
|
2016-08-15 15:02:08 +01:00
|
|
|
case 7:
|
|
|
|
EmitTerminalCheckHalt(boost::get<IR::Term::CheckHalt>(terminal), initial_location);
|
|
|
|
return;
|
2016-07-07 10:53:09 +01:00
|
|
|
default:
|
2016-08-26 16:43:51 +01:00
|
|
|
ASSERT_MSG(false, "Invalid Terminal. Bad programmer.");
|
2016-07-07 10:53:09 +01:00
|
|
|
return;
|
|
|
|
}
|
2016-07-04 14:37:50 +01:00
|
|
|
}
|
|
|
|
|
2016-09-05 11:54:09 +01:00
|
|
|
void EmitX64::EmitTerminalInterpret(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) {
|
2016-08-01 20:03:13 +01:00
|
|
|
ASSERT_MSG(terminal.next.TFlag() == initial_location.TFlag(), "Unimplemented");
|
|
|
|
ASSERT_MSG(terminal.next.EFlag() == initial_location.EFlag(), "Unimplemented");
|
2016-07-04 10:22:11 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->mov(code->ABI_PARAM1.cvt32(), terminal.next.PC());
|
|
|
|
code->mov(code->ABI_PARAM2, reinterpret_cast<u64>(jit_interface));
|
2016-09-01 01:58:19 +01:00
|
|
|
code->mov(code->ABI_PARAM3, reinterpret_cast<u64>(cb.user_arg));
|
2016-08-24 20:07:08 +01:00
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), code->ABI_PARAM1.cvt32());
|
2016-08-07 22:47:17 +01:00
|
|
|
code->SwitchMxcsrOnExit();
|
2016-08-31 21:09:26 +01:00
|
|
|
code->CallFunction(cb.InterpreterFallback);
|
2016-08-07 22:47:17 +01:00
|
|
|
code->ReturnFromRunCode(false); // TODO: Check cycles
|
2016-07-07 10:53:09 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitTerminalReturnToDispatch(IR::Term::ReturnToDispatch, IR::LocationDescriptor) {
|
2016-08-07 18:08:48 +01:00
|
|
|
code->ReturnFromRunCode();
|
2016-07-01 14:01:06 +01:00
|
|
|
}
|
|
|
|
|
2016-09-05 11:54:09 +01:00
|
|
|
void EmitX64::EmitTerminalLinkBlock(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) {
|
2016-08-24 20:07:08 +01:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2016-08-01 20:03:13 +01:00
|
|
|
if (terminal.next.TFlag() != initial_location.TFlag()) {
|
|
|
|
if (terminal.next.TFlag()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->or_(MJitStateCpsr(), u32(1 << 5));
|
2016-07-18 22:18:58 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(MJitStateCpsr(), u32(~(1 << 5)));
|
2016-07-18 22:18:58 +01:00
|
|
|
}
|
|
|
|
}
|
2016-08-01 20:03:13 +01:00
|
|
|
if (terminal.next.EFlag() != initial_location.EFlag()) {
|
|
|
|
if (terminal.next.EFlag()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->or_(MJitStateCpsr(), u32(1 << 9));
|
2016-07-20 15:34:17 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(MJitStateCpsr(), u32(~(1 << 9)));
|
2016-07-20 15:34:17 +01:00
|
|
|
}
|
|
|
|
}
|
2016-08-07 22:47:43 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cmp(qword[r15 + offsetof(JitState, cycles_remaining)], 0);
|
2016-08-07 22:47:43 +01:00
|
|
|
|
2016-12-19 15:01:49 +00:00
|
|
|
patch_information[terminal.next.UniqueHash()].jg.emplace_back(code->getCurr());
|
2016-08-15 14:33:17 +01:00
|
|
|
if (auto next_bb = GetBasicBlock(terminal.next)) {
|
2016-12-19 15:01:49 +00:00
|
|
|
EmitPatchJg(next_bb->code_ptr);
|
|
|
|
} else {
|
|
|
|
EmitPatchJg();
|
2016-08-07 22:47:43 +01:00
|
|
|
}
|
2016-08-24 20:07:08 +01:00
|
|
|
|
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), terminal.next.PC());
|
2016-08-07 18:08:48 +01:00
|
|
|
code->ReturnFromRunCode(); // TODO: Check cycles, Properly do a link
|
2016-07-07 10:53:09 +01:00
|
|
|
}
|
|
|
|
|
2016-09-05 11:54:09 +01:00
|
|
|
void EmitX64::EmitTerminalLinkBlockFast(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) {
|
2016-08-24 20:07:08 +01:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2016-08-15 14:33:17 +01:00
|
|
|
if (terminal.next.TFlag() != initial_location.TFlag()) {
|
|
|
|
if (terminal.next.TFlag()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->or_(MJitStateCpsr(), u32(1 << 5));
|
2016-08-15 14:33:17 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(MJitStateCpsr(), u32(~(1 << 5)));
|
2016-08-15 14:33:17 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (terminal.next.EFlag() != initial_location.EFlag()) {
|
|
|
|
if (terminal.next.EFlag()) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->or_(MJitStateCpsr(), u32(1 << 9));
|
2016-08-15 14:33:17 +01:00
|
|
|
} else {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->and_(MJitStateCpsr(), u32(~(1 << 9)));
|
2016-08-15 14:33:17 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-19 15:01:49 +00:00
|
|
|
patch_information[terminal.next.UniqueHash()].jmp.emplace_back(code->getCurr());
|
2016-08-15 14:33:17 +01:00
|
|
|
if (auto next_bb = GetBasicBlock(terminal.next)) {
|
2016-12-19 15:01:49 +00:00
|
|
|
EmitPatchJmp(terminal.next, next_bb->code_ptr);
|
2016-08-15 14:33:17 +01:00
|
|
|
} else {
|
2016-12-19 15:01:49 +00:00
|
|
|
EmitPatchJmp(terminal.next);
|
2016-08-15 14:33:17 +01:00
|
|
|
}
|
2016-07-07 10:53:09 +01:00
|
|
|
}
|
|
|
|
|
2016-12-15 20:47:43 +00:00
|
|
|
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor) {
|
2016-08-24 20:07:08 +01:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
2016-08-13 00:10:23 +01:00
|
|
|
// This calculation has to match up with IREmitter::PushRSB
|
2016-08-24 20:07:08 +01:00
|
|
|
code->mov(ebx, MJitStateCpsr());
|
|
|
|
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
|
|
|
|
code->and_(ebx, u32((1 << 5) | (1 << 9)));
|
|
|
|
code->shr(ebx, 2);
|
2016-09-05 14:39:17 +01:00
|
|
|
code->or_(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
|
2016-08-24 20:07:08 +01:00
|
|
|
code->shl(rbx, 32);
|
|
|
|
code->or_(rbx, rcx);
|
|
|
|
|
2016-12-05 16:29:36 +00:00
|
|
|
code->mov(rax, reinterpret_cast<u64>(code->GetReturnFromRunCodeAddress()));
|
2016-08-13 00:10:23 +01:00
|
|
|
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
|
|
|
code->cmove(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
|
2016-08-13 00:10:23 +01:00
|
|
|
}
|
2016-08-15 14:08:06 +01:00
|
|
|
|
2016-08-24 20:07:08 +01:00
|
|
|
code->jmp(rax);
|
2016-07-07 10:53:09 +01:00
|
|
|
}
|
|
|
|
|
2016-09-05 11:54:09 +01:00
|
|
|
void EmitX64::EmitTerminalIf(IR::Term::If terminal, IR::LocationDescriptor initial_location) {
|
2016-08-24 20:07:08 +01:00
|
|
|
Xbyak::Label pass = EmitCond(code, terminal.if_);
|
2016-07-18 21:04:39 +01:00
|
|
|
EmitTerminal(terminal.else_, initial_location);
|
2016-08-24 20:07:08 +01:00
|
|
|
code->L(pass);
|
2016-07-18 21:04:39 +01:00
|
|
|
EmitTerminal(terminal.then_, initial_location);
|
2016-07-07 10:53:09 +01:00
|
|
|
}
|
|
|
|
|
2016-09-05 11:54:09 +01:00
|
|
|
void EmitX64::EmitTerminalCheckHalt(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) {
|
2016-08-24 20:07:08 +01:00
|
|
|
using namespace Xbyak::util;
|
|
|
|
|
|
|
|
code->cmp(code->byte[r15 + offsetof(JitState, halt_requested)], u8(0));
|
|
|
|
code->jne(code->GetReturnFromRunCodeAddress());
|
2016-08-15 15:02:08 +01:00
|
|
|
EmitTerminal(terminal.else_, initial_location);
|
|
|
|
}
|
|
|
|
|
2016-12-19 15:01:49 +00:00
|
|
|
void EmitX64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) {
|
2016-08-24 20:07:08 +01:00
|
|
|
const CodePtr save_code_ptr = code->getCurr();
|
2016-12-19 15:01:49 +00:00
|
|
|
const PatchInformation& patch_info = patch_information[desc.UniqueHash()];
|
2016-08-07 22:11:39 +01:00
|
|
|
|
2016-12-19 15:01:49 +00:00
|
|
|
for (CodePtr location : patch_info.jg) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->SetCodePtr(location);
|
2016-12-19 15:01:49 +00:00
|
|
|
EmitPatchJg(bb);
|
2016-08-07 22:11:39 +01:00
|
|
|
}
|
|
|
|
|
2016-12-19 15:01:49 +00:00
|
|
|
for (CodePtr location : patch_info.jmp) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->SetCodePtr(location);
|
2016-12-19 15:01:49 +00:00
|
|
|
EmitPatchJmp(desc, bb);
|
2016-08-15 14:33:17 +01:00
|
|
|
}
|
|
|
|
|
2016-12-19 15:01:49 +00:00
|
|
|
for (CodePtr location : patch_info.mov_rcx) {
|
2016-08-24 20:07:08 +01:00
|
|
|
code->SetCodePtr(location);
|
2016-12-19 15:01:49 +00:00
|
|
|
EmitPatchMovRcx(bb);
|
2016-08-13 00:10:23 +01:00
|
|
|
}
|
|
|
|
|
2016-08-07 22:11:39 +01:00
|
|
|
code->SetCodePtr(save_code_ptr);
|
|
|
|
}
|
|
|
|
|
2016-12-19 15:01:49 +00:00
|
|
|
void EmitX64::Unpatch(const IR::LocationDescriptor& desc) {
|
|
|
|
Patch(desc, nullptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPatchJg(CodePtr target_code_ptr) {
|
|
|
|
const CodePtr patch_location = code->getCurr();
|
|
|
|
if (target_code_ptr) {
|
|
|
|
code->jg(target_code_ptr);
|
|
|
|
}
|
|
|
|
code->EnsurePatchLocationSize(patch_location, 6);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
|
|
|
|
const CodePtr patch_location = code->getCurr();
|
|
|
|
if (target_code_ptr) {
|
|
|
|
code->jmp(target_code_ptr);
|
|
|
|
} else {
|
|
|
|
code->mov(MJitStateReg(Arm::Reg::PC), target_desc.PC());
|
|
|
|
code->jmp(code->GetReturnFromRunCodeAddress());
|
|
|
|
}
|
|
|
|
code->EnsurePatchLocationSize(patch_location, 13);
|
|
|
|
}
|
|
|
|
|
|
|
|
void EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) {
|
|
|
|
if (!target_code_ptr) {
|
|
|
|
target_code_ptr = code->GetReturnFromRunCodeAddress();
|
|
|
|
}
|
|
|
|
const CodePtr patch_location = code->getCurr();
|
|
|
|
code->mov(code->rcx, reinterpret_cast<u64>(target_code_ptr));
|
|
|
|
code->EnsurePatchLocationSize(patch_location, 10);
|
|
|
|
}
|
|
|
|
|
2016-07-07 12:01:47 +01:00
|
|
|
void EmitX64::ClearCache() {
|
2016-12-19 15:01:49 +00:00
|
|
|
block_descriptors.clear();
|
|
|
|
patch_information.clear();
|
2016-07-07 12:01:47 +01:00
|
|
|
}
|
|
|
|
|
2016-07-01 14:01:06 +01:00
|
|
|
} // namespace BackendX64
|
|
|
|
} // namespace Dynarmic
|