A64: Implement STXP, STLXP, LDXP, LDAXP
This commit is contained in:
parent
2a6619d59c
commit
8698f057d0
6 changed files with 94 additions and 14 deletions
|
@ -28,6 +28,25 @@ void EmitX64::EmitPack2x32To1x64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, lo);
|
ctx.reg_alloc.DefineValue(inst, lo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitPack2x64To1x128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
Xbyak::Reg64 lo = ctx.reg_alloc.UseGpr(args[0]);
|
||||||
|
Xbyak::Reg64 hi = ctx.reg_alloc.UseGpr(args[1]);
|
||||||
|
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
|
code.movq(result, lo);
|
||||||
|
code.pinsrq(result, hi, 1);
|
||||||
|
} else {
|
||||||
|
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
code.movq(result, lo);
|
||||||
|
code.movq(tmp, hi);
|
||||||
|
code.punpcklqdq(result, tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitLeastSignificantWord(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitLeastSignificantWord(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
ctx.reg_alloc.DefineValue(inst, args[0]);
|
ctx.reg_alloc.DefineValue(inst, args[0]);
|
||||||
|
|
|
@ -137,12 +137,12 @@ INST(LDx_mult_2, "LDx (multiple structures)", "0Q001
|
||||||
// Loads and stores - Load/Store Exclusive
|
// Loads and stores - Load/Store Exclusive
|
||||||
INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt")
|
INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt")
|
||||||
INST(STLXR, "STLXRB, STLXRH, STLXR", "zz001000000sssss111111nnnnnttttt")
|
INST(STLXR, "STLXRB, STLXRH, STLXR", "zz001000000sssss111111nnnnnttttt")
|
||||||
//INST(STXP, "STXP", "1z001000001sssss0uuuuunnnnnttttt")
|
INST(STXP, "STXP", "1z001000001sssss0uuuuunnnnnttttt")
|
||||||
//INST(STLXP, "STLXP", "1z001000001sssss1uuuuunnnnnttttt")
|
INST(STLXP, "STLXP", "1z001000001sssss1uuuuunnnnnttttt")
|
||||||
INST(LDXR, "LDXRB, LDXRH, LDXR", "zz00100001011111011111nnnnnttttt")
|
INST(LDXR, "LDXRB, LDXRH, LDXR", "zz00100001011111011111nnnnnttttt")
|
||||||
INST(LDAXR, "LDAXRB, LDAXRH, LDAXR", "zz00100001011111111111nnnnnttttt")
|
INST(LDAXR, "LDAXRB, LDAXRH, LDAXR", "zz00100001011111111111nnnnnttttt")
|
||||||
//INST(LDXP, "LDXP", "1z001000011111110uuuuunnnnnttttt")
|
INST(LDXP, "LDXP", "1z001000011111110uuuuunnnnnttttt")
|
||||||
//INST(LDAXP, "LDAXP", "1z001000011111111uuuuunnnnnttttt")
|
INST(LDAXP, "LDAXP", "1z001000011111111uuuuunnnnnttttt")
|
||||||
INST(STLLR, "STLLRB, STLLRH, STLLR", "zz00100010011111011111nnnnnttttt")
|
INST(STLLR, "STLLRB, STLLRH, STLLR", "zz00100010011111011111nnnnnttttt")
|
||||||
INST(STLR, "STLRB, STLRH, STLR", "zz00100010011111111111nnnnnttttt")
|
INST(STLR, "STLRB, STLRH, STLR", "zz00100010011111111111nnnnnttttt")
|
||||||
INST(LDLAR, "LDLARB, LDLARH, LDLAR", "zz00100011011111011111nnnnnttttt")
|
INST(LDLAR, "LDLARB, LDLARH, LDLAR", "zz00100011011111011111nnnnnttttt")
|
||||||
|
|
|
@ -10,20 +10,24 @@
|
||||||
|
|
||||||
namespace Dynarmic::A64 {
|
namespace Dynarmic::A64 {
|
||||||
|
|
||||||
static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter& ir, size_t size, bool L, bool o0, boost::optional<Reg> Rs, Reg Rn, Reg Rt) {
|
static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter& ir, bool pair, size_t size, bool L, bool o0, boost::optional<Reg> Rs, boost::optional<Reg> Rt2, Reg Rn, Reg Rt) {
|
||||||
// Shared Decode
|
// Shared Decode
|
||||||
|
|
||||||
const AccType acctype = o0 ? AccType::ORDERED : AccType::ATOMIC;
|
const AccType acctype = o0 ? AccType::ORDERED : AccType::ATOMIC;
|
||||||
const MemOp memop = L ? MemOp::LOAD : MemOp::STORE;
|
const MemOp memop = L ? MemOp::LOAD : MemOp::STORE;
|
||||||
const size_t elsize = 8 << size;
|
const size_t elsize = 8 << size;
|
||||||
const size_t regsize = elsize == 64 ? 64 : 32;
|
const size_t regsize = elsize == 64 ? 64 : 32;
|
||||||
const size_t datasize = elsize;
|
const size_t datasize = pair ? elsize * 2 : elsize;
|
||||||
|
|
||||||
// Operation
|
// Operation
|
||||||
|
|
||||||
const size_t dbytes = datasize / 8;
|
const size_t dbytes = datasize / 8;
|
||||||
|
|
||||||
if (memop == MemOp::STORE && *Rs == Rn && Rn != Reg::R31) {
|
if (memop == MemOp::LOAD && pair && Rt == *Rt2) {
|
||||||
|
return tv.UnpredictableInstruction();
|
||||||
|
} else if (memop == MemOp::STORE && (*Rs == Rt || (pair && *Rs == *Rt2))) {
|
||||||
|
return tv.UnpredictableInstruction();
|
||||||
|
} else if (memop == MemOp::STORE && *Rs == Rn && Rn != Reg::R31) {
|
||||||
return tv.UnpredictableInstruction();
|
return tv.UnpredictableInstruction();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,15 +41,30 @@ static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter&
|
||||||
|
|
||||||
switch (memop) {
|
switch (memop) {
|
||||||
case MemOp::STORE: {
|
case MemOp::STORE: {
|
||||||
IR::UAny data = tv.X(datasize, Rt);
|
IR::UAnyU128 data;
|
||||||
|
if (pair && elsize == 64) {
|
||||||
|
data = ir.Pack2x64To1x128(tv.X(64, Rt), tv.X(64, *Rt2));
|
||||||
|
} else if (pair && elsize == 32) {
|
||||||
|
data = ir.Pack2x32To1x64(tv.X(32, Rt), tv.X(32, *Rt2));
|
||||||
|
} else {
|
||||||
|
data = tv.X(datasize, Rt);
|
||||||
|
}
|
||||||
IR::U32 status = tv.ExclusiveMem(address, dbytes, acctype, data);
|
IR::U32 status = tv.ExclusiveMem(address, dbytes, acctype, data);
|
||||||
tv.X(32, *Rs, status);
|
tv.X(32, *Rs, status);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case MemOp::LOAD: {
|
case MemOp::LOAD: {
|
||||||
ir.SetExclusive(address, dbytes);
|
ir.SetExclusive(address, dbytes);
|
||||||
IR::UAny data = tv.Mem(address, dbytes, acctype);
|
IR::UAnyU128 data = tv.Mem(address, dbytes, acctype);
|
||||||
tv.X(regsize, Rt, tv.ZeroExtend(data, regsize));
|
if (pair && elsize == 64) {
|
||||||
|
tv.X(64, Rt, ir.VectorGetElement(64, data, 0));
|
||||||
|
tv.X(64, *Rt2, ir.VectorGetElement(64, data, 1));
|
||||||
|
} else if (pair && elsize == 32) {
|
||||||
|
tv.X(32, Rt, ir.LeastSignificantWord(data));
|
||||||
|
tv.X(32, *Rt2, ir.MostSignificantWord(data).result);
|
||||||
|
} else {
|
||||||
|
tv.X(regsize, Rt, tv.ZeroExtend(data, regsize));
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -56,31 +75,67 @@ static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter&
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::STXR(Imm<2> sz, Reg Rs, Reg Rn, Reg Rt) {
|
bool TranslatorVisitor::STXR(Imm<2> sz, Reg Rs, Reg Rn, Reg Rt) {
|
||||||
|
const bool pair = false;
|
||||||
const size_t size = sz.ZeroExtend<size_t>();
|
const size_t size = sz.ZeroExtend<size_t>();
|
||||||
const bool L = 0;
|
const bool L = 0;
|
||||||
const bool o0 = 0;
|
const bool o0 = 0;
|
||||||
return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, Rs, Rn, Rt);
|
return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, Rs, {}, Rn, Rt);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::STLXR(Imm<2> sz, Reg Rs, Reg Rn, Reg Rt) {
|
bool TranslatorVisitor::STLXR(Imm<2> sz, Reg Rs, Reg Rn, Reg Rt) {
|
||||||
|
const bool pair = false;
|
||||||
const size_t size = sz.ZeroExtend<size_t>();
|
const size_t size = sz.ZeroExtend<size_t>();
|
||||||
const bool L = 0;
|
const bool L = 0;
|
||||||
const bool o0 = 1;
|
const bool o0 = 1;
|
||||||
return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, Rs, Rn, Rt);
|
return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, Rs, {}, Rn, Rt);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TranslatorVisitor::STXP(Imm<1> sz, Reg Rs, Reg Rt2, Reg Rn, Reg Rt) {
|
||||||
|
const bool pair = true;
|
||||||
|
const size_t size = concatenate(Imm<1>{1}, sz).ZeroExtend<size_t>();
|
||||||
|
const bool L = 0;
|
||||||
|
const bool o0 = 0;
|
||||||
|
return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, Rs, Rt2, Rn, Rt);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TranslatorVisitor::STLXP(Imm<1> sz, Reg Rs, Reg Rt2, Reg Rn, Reg Rt) {
|
||||||
|
const bool pair = true;
|
||||||
|
const size_t size = concatenate(Imm<1>{1}, sz).ZeroExtend<size_t>();
|
||||||
|
const bool L = 0;
|
||||||
|
const bool o0 = 1;
|
||||||
|
return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, Rs, Rt2, Rn, Rt);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::LDXR(Imm<2> sz, Reg Rn, Reg Rt) {
|
bool TranslatorVisitor::LDXR(Imm<2> sz, Reg Rn, Reg Rt) {
|
||||||
|
const bool pair = false;
|
||||||
const size_t size = sz.ZeroExtend<size_t>();
|
const size_t size = sz.ZeroExtend<size_t>();
|
||||||
const bool L = 1;
|
const bool L = 1;
|
||||||
const bool o0 = 0;
|
const bool o0 = 0;
|
||||||
return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, {}, Rn, Rt);
|
return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, {}, {}, Rn, Rt);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TranslatorVisitor::LDAXR(Imm<2> sz, Reg Rn, Reg Rt) {
|
bool TranslatorVisitor::LDAXR(Imm<2> sz, Reg Rn, Reg Rt) {
|
||||||
|
const bool pair = false;
|
||||||
const size_t size = sz.ZeroExtend<size_t>();
|
const size_t size = sz.ZeroExtend<size_t>();
|
||||||
const bool L = 1;
|
const bool L = 1;
|
||||||
const bool o0 = 1;
|
const bool o0 = 1;
|
||||||
return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, {}, Rn, Rt);
|
return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, {}, {}, Rn, Rt);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TranslatorVisitor::LDXP(Imm<1> sz, Reg Rt2, Reg Rn, Reg Rt) {
|
||||||
|
const bool pair = true;
|
||||||
|
const size_t size = concatenate(Imm<1>{1}, sz).ZeroExtend<size_t>();
|
||||||
|
const bool L = 1;
|
||||||
|
const bool o0 = 0;
|
||||||
|
return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, {}, Rt2, Rn, Rt);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TranslatorVisitor::LDAXP(Imm<1> sz, Reg Rt2, Reg Rn, Reg Rt) {
|
||||||
|
const bool pair = true;
|
||||||
|
const size_t size = concatenate(Imm<1>{1}, sz).ZeroExtend<size_t>();
|
||||||
|
const bool L = 1;
|
||||||
|
const bool o0 = 1;
|
||||||
|
return ExclusiveSharedDecodeAndOperation(*this, ir, pair, size, L, o0, {}, Rt2, Rn, Rt);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool OrderedSharedDecodeAndOperation(TranslatorVisitor& tv, size_t size, bool L, bool o0, Reg Rn, Reg Rt) {
|
static bool OrderedSharedDecodeAndOperation(TranslatorVisitor& tv, size_t size, bool L, bool o0, Reg Rn, Reg Rt) {
|
||||||
|
|
|
@ -38,6 +38,10 @@ U64 IREmitter::Pack2x32To1x64(const U32& lo, const U32& hi) {
|
||||||
return Inst<U64>(Opcode::Pack2x32To1x64, lo, hi);
|
return Inst<U64>(Opcode::Pack2x32To1x64, lo, hi);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::Pack2x64To1x128(const U64& lo, const U64& hi) {
|
||||||
|
return Inst<U128>(Opcode::Pack2x64To1x128, lo, hi);
|
||||||
|
}
|
||||||
|
|
||||||
U32 IREmitter::LeastSignificantWord(const U64& value) {
|
U32 IREmitter::LeastSignificantWord(const U64& value) {
|
||||||
return Inst<U32>(Opcode::LeastSignificantWord, value);
|
return Inst<U32>(Opcode::LeastSignificantWord, value);
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,6 +68,7 @@ public:
|
||||||
void PushRSB(const LocationDescriptor& return_location);
|
void PushRSB(const LocationDescriptor& return_location);
|
||||||
|
|
||||||
U64 Pack2x32To1x64(const U32& lo, const U32& hi);
|
U64 Pack2x32To1x64(const U32& lo, const U32& hi);
|
||||||
|
U128 Pack2x64To1x128(const U64& lo, const U64& hi);
|
||||||
U32 LeastSignificantWord(const U64& value);
|
U32 LeastSignificantWord(const U64& value);
|
||||||
ResultAndCarry<U32> MostSignificantWord(const U64& value);
|
ResultAndCarry<U32> MostSignificantWord(const U64& value);
|
||||||
U16 LeastSignificantHalf(U32U64 value);
|
U16 LeastSignificantHalf(U32U64 value);
|
||||||
|
|
|
@ -77,6 +77,7 @@ OPCODE(NZCVFromPackedFlags, T::NZCVFlags, T::U32
|
||||||
|
|
||||||
// Calculations
|
// Calculations
|
||||||
OPCODE(Pack2x32To1x64, T::U64, T::U32, T::U32 )
|
OPCODE(Pack2x32To1x64, T::U64, T::U32, T::U32 )
|
||||||
|
OPCODE(Pack2x64To1x128, T::U128, T::U64, T::U64 )
|
||||||
OPCODE(LeastSignificantWord, T::U32, T::U64 )
|
OPCODE(LeastSignificantWord, T::U32, T::U64 )
|
||||||
OPCODE(MostSignificantWord, T::U32, T::U64 )
|
OPCODE(MostSignificantWord, T::U32, T::U64 )
|
||||||
OPCODE(LeastSignificantHalf, T::U16, T::U32 )
|
OPCODE(LeastSignificantHalf, T::U16, T::U32 )
|
||||||
|
|
Loading…
Reference in a new issue