IR: Add VectorTable and VectorTableLookup IR instructions
This commit is contained in:
parent
0288974512
commit
89d08c7d61
12 changed files with 133 additions and 39 deletions
|
@ -2696,6 +2696,56 @@ void EmitX64::EmitVectorSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubq);
|
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorTable(EmitContext&, IR::Inst* inst) {
|
||||||
|
// Do nothing. We *want* to hold on to the refcount for our arguments, so VectorTableLookup can use our arguments.
|
||||||
|
ASSERT_MSG(inst->UseCount() == 1, "Table cannot be used multiple times");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorTableLookup(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
ASSERT(inst->GetArg(1).GetInst()->GetOpcode() == IR::Opcode::VectorTable);
|
||||||
|
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst());
|
||||||
|
|
||||||
|
const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem){ return !elem.IsVoid(); });
|
||||||
|
|
||||||
|
const u32 stack_space = static_cast<u32>((table_size + 2) * 16);
|
||||||
|
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||||
|
for (size_t i = 0; i < table_size; ++i) {
|
||||||
|
const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]);
|
||||||
|
code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value);
|
||||||
|
}
|
||||||
|
const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]);
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
ctx.reg_alloc.EndOfAllocScope();
|
||||||
|
ctx.reg_alloc.HostCall(nullptr);
|
||||||
|
|
||||||
|
code.movaps(xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16], defaults);
|
||||||
|
code.movaps(xword[rsp + ABI_SHADOW_SPACE + (table_size + 1) * 16], indicies);
|
||||||
|
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||||
|
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
|
||||||
|
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + (table_size + 1) * 16]);
|
||||||
|
code.mov(code.ABI_PARAM4.cvt32(), table_size);
|
||||||
|
|
||||||
|
code.CallFunction(static_cast<void(*)(const VectorArray<u8>*, VectorArray<u8>&, const VectorArray<u8>&, size_t)>(
|
||||||
|
[](const VectorArray<u8>* table, VectorArray<u8>& result, const VectorArray<u8>& indicies, size_t table_size) {
|
||||||
|
for (size_t i = 0; i < result.size(); ++i) {
|
||||||
|
const size_t index = indicies[i] / table[0].size();
|
||||||
|
const size_t elem = indicies[i] % table[0].size();
|
||||||
|
if (index < table_size) {
|
||||||
|
result[i] = table[index][elem];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
));
|
||||||
|
|
||||||
|
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
|
||||||
|
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
|
|
||||||
static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
|
static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
|
|
@ -41,37 +41,47 @@ static bool CanExchange(HostLoc a, HostLoc b) {
|
||||||
// Minimum number of bits required to represent a type
|
// Minimum number of bits required to represent a type
|
||||||
static size_t GetBitWidth(IR::Type type) {
|
static size_t GetBitWidth(IR::Type type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case IR::Type::A32Reg:
|
case IR::Type::A32Reg:
|
||||||
case IR::Type::A32ExtReg:
|
case IR::Type::A32ExtReg:
|
||||||
case IR::Type::A64Reg:
|
case IR::Type::A64Reg:
|
||||||
case IR::Type::A64Vec:
|
case IR::Type::A64Vec:
|
||||||
case IR::Type::CoprocInfo:
|
case IR::Type::CoprocInfo:
|
||||||
case IR::Type::Cond:
|
case IR::Type::Cond:
|
||||||
case IR::Type::Void:
|
case IR::Type::Void:
|
||||||
ASSERT_MSG(false, "Type {} cannot be represented at runtime", type);
|
case IR::Type::Table:
|
||||||
return 0;
|
ASSERT_MSG(false, "Type {} cannot be represented at runtime", type);
|
||||||
case IR::Type::Opaque:
|
return 0;
|
||||||
ASSERT_MSG(false, "Not a concrete type");
|
case IR::Type::Opaque:
|
||||||
return 0;
|
ASSERT_MSG(false, "Not a concrete type");
|
||||||
case IR::Type::U1:
|
return 0;
|
||||||
return 8;
|
case IR::Type::U1:
|
||||||
case IR::Type::U8:
|
return 8;
|
||||||
return 8;
|
case IR::Type::U8:
|
||||||
case IR::Type::U16:
|
return 8;
|
||||||
return 16;
|
case IR::Type::U16:
|
||||||
case IR::Type::U32:
|
return 16;
|
||||||
return 32;
|
case IR::Type::U32:
|
||||||
case IR::Type::U64:
|
return 32;
|
||||||
return 64;
|
case IR::Type::U64:
|
||||||
case IR::Type::U128:
|
return 64;
|
||||||
return 128;
|
case IR::Type::U128:
|
||||||
case IR::Type::NZCVFlags:
|
return 128;
|
||||||
return 32; // TODO: Update to 16 when flags optimization is done
|
case IR::Type::NZCVFlags:
|
||||||
|
return 32; // TODO: Update to 16 when flags optimization is done
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool IsValuelessType(IR::Type type) {
|
||||||
|
switch (type) {
|
||||||
|
case IR::Type::Table:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool HostLocInfo::IsLocked() const {
|
bool HostLocInfo::IsLocked() const {
|
||||||
return is_being_used;
|
return is_being_used;
|
||||||
}
|
}
|
||||||
|
@ -139,6 +149,10 @@ bool Argument::IsImmediate() const {
|
||||||
return value.IsImmediate();
|
return value.IsImmediate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Argument::IsVoid() const {
|
||||||
|
return GetType() == IR::Type::Void;
|
||||||
|
}
|
||||||
|
|
||||||
bool Argument::FitsInImmediateU32() const {
|
bool Argument::FitsInImmediateU32() const {
|
||||||
if (!IsImmediate())
|
if (!IsImmediate())
|
||||||
return false;
|
return false;
|
||||||
|
@ -209,11 +223,11 @@ bool Argument::IsInMemory() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
|
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
|
||||||
ArgumentInfo ret = { Argument{*this}, Argument{*this}, Argument{*this} };
|
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
|
||||||
for (size_t i = 0; i < inst->NumArgs(); i++) {
|
for (size_t i = 0; i < inst->NumArgs(); i++) {
|
||||||
const IR::Value& arg = inst->GetArg(i);
|
const IR::Value& arg = inst->GetArg(i);
|
||||||
ret[i].value = arg;
|
ret[i].value = arg;
|
||||||
if (!arg.IsImmediate()) {
|
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
|
||||||
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
|
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
|
||||||
LocInfo(*ValueLocation(arg.GetInst())).AddArgReference();
|
LocInfo(*ValueLocation(arg.GetInst())).AddArgReference();
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,6 +61,7 @@ struct Argument {
|
||||||
public:
|
public:
|
||||||
IR::Type GetType() const;
|
IR::Type GetType() const;
|
||||||
bool IsImmediate() const;
|
bool IsImmediate() const;
|
||||||
|
bool IsVoid() const;
|
||||||
|
|
||||||
bool FitsInImmediateU32() const;
|
bool FitsInImmediateU32() const;
|
||||||
bool FitsInImmediateS32() const;
|
bool FitsInImmediateS32() const;
|
||||||
|
@ -82,7 +83,7 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class RegAlloc;
|
friend class RegAlloc;
|
||||||
Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
|
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
|
||||||
|
|
||||||
bool allocated = false;
|
bool allocated = false;
|
||||||
RegAlloc& reg_alloc;
|
RegAlloc& reg_alloc;
|
||||||
|
@ -91,7 +92,7 @@ private:
|
||||||
|
|
||||||
class RegAlloc final {
|
class RegAlloc final {
|
||||||
public:
|
public:
|
||||||
using ArgumentInfo = std::array<Argument, 3>;
|
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
|
||||||
|
|
||||||
explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr)
|
explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr)
|
||||||
: hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
|
: hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
|
||||||
|
|
|
@ -1537,6 +1537,16 @@ U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Table IREmitter::VectorTable(std::vector<U128> values) {
|
||||||
|
ASSERT(values.size() >= 1 && values.size() <= 4);
|
||||||
|
values.resize(4);
|
||||||
|
return Inst<Table>(Opcode::VectorTable, values[0], values[1], values[2], values[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorTableLookup(const U128& defaults, const Table& table, const U128& indices) {
|
||||||
|
return Inst<U128>(Opcode::VectorTableLookup, defaults, table, indices);
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
|
|
@ -266,6 +266,8 @@ public:
|
||||||
U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
|
U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
|
||||||
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
|
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
|
||||||
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
||||||
|
Table VectorTable(std::vector<U128> values);
|
||||||
|
U128 VectorTableLookup(const U128& defaults, const Table& table, const U128& indices);
|
||||||
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a);
|
U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a);
|
||||||
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
||||||
|
|
|
@ -489,7 +489,7 @@ size_t Inst::NumArgs() const {
|
||||||
|
|
||||||
Value Inst::GetArg(size_t index) const {
|
Value Inst::GetArg(size_t index) const {
|
||||||
ASSERT_MSG(index < GetNumArgsOf(op), "Inst::GetArg: index {} >= number of arguments of {} ({})", index, op, GetNumArgsOf(op));
|
ASSERT_MSG(index < GetNumArgsOf(op), "Inst::GetArg: index {} >= number of arguments of {} ({})", index, op, GetNumArgsOf(op));
|
||||||
ASSERT_MSG(!args[index].IsEmpty(), "Inst::GetArg: index {} is empty", index);
|
ASSERT_MSG(!args[index].IsEmpty() || GetArgTypeOf(op, index) == IR::Type::Opaque, "Inst::GetArg: index {} is empty", index, args[index].GetType());
|
||||||
|
|
||||||
return args[index];
|
return args[index];
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,8 @@ namespace Dynarmic::IR {
|
||||||
enum class Opcode;
|
enum class Opcode;
|
||||||
enum class Type;
|
enum class Type;
|
||||||
|
|
||||||
|
constexpr size_t max_arg_count = 4;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A representation of a microinstruction. A single ARM/Thumb instruction may be
|
* A representation of a microinstruction. A single ARM/Thumb instruction may be
|
||||||
* converted into zero or more microinstructions.
|
* converted into zero or more microinstructions.
|
||||||
|
@ -136,7 +138,7 @@ private:
|
||||||
|
|
||||||
Opcode op;
|
Opcode op;
|
||||||
size_t use_count = 0;
|
size_t use_count = 0;
|
||||||
std::array<Value, 3> args;
|
std::array<Value, max_arg_count> args;
|
||||||
|
|
||||||
// Pointers to related pseudooperations:
|
// Pointers to related pseudooperations:
|
||||||
// Since not all combinations are possible, we use a union to save space
|
// Since not all combinations are possible, we use a union to save space
|
||||||
|
|
|
@ -43,6 +43,7 @@ constexpr Type U128 = Type::U128;
|
||||||
constexpr Type CoprocInfo = Type::CoprocInfo;
|
constexpr Type CoprocInfo = Type::CoprocInfo;
|
||||||
constexpr Type NZCV = Type::NZCVFlags;
|
constexpr Type NZCV = Type::NZCVFlags;
|
||||||
constexpr Type Cond = Type::Cond;
|
constexpr Type Cond = Type::Cond;
|
||||||
|
constexpr Type Table = Type::Table;
|
||||||
|
|
||||||
static const std::map<Opcode, Meta> opcode_info {{
|
static const std::map<Opcode, Meta> opcode_info {{
|
||||||
#define OPCODE(name, type, ...) { Opcode::name, { #name, type, { __VA_ARGS__ } } },
|
#define OPCODE(name, type, ...) { Opcode::name, { #name, type, { __VA_ARGS__ } } },
|
||||||
|
|
|
@ -401,6 +401,8 @@ OPCODE(VectorSub8, U128, U128,
|
||||||
OPCODE(VectorSub16, U128, U128, U128 )
|
OPCODE(VectorSub16, U128, U128, U128 )
|
||||||
OPCODE(VectorSub32, U128, U128, U128 )
|
OPCODE(VectorSub32, U128, U128, U128 )
|
||||||
OPCODE(VectorSub64, U128, U128, U128 )
|
OPCODE(VectorSub64, U128, U128, U128 )
|
||||||
|
OPCODE(VectorTable, Table, U128, Opaque, Opaque, Opaque )
|
||||||
|
OPCODE(VectorTableLookup, U128, U128, Table, U128 )
|
||||||
OPCODE(VectorUnsignedAbsoluteDifference8, U128, U128, U128 )
|
OPCODE(VectorUnsignedAbsoluteDifference8, U128, U128, U128 )
|
||||||
OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128, U128 )
|
OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128, U128 )
|
||||||
OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 )
|
OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 )
|
||||||
|
|
|
@ -16,13 +16,23 @@
|
||||||
namespace Dynarmic::IR {
|
namespace Dynarmic::IR {
|
||||||
|
|
||||||
std::string GetNameOf(Type type) {
|
std::string GetNameOf(Type type) {
|
||||||
static const std::array<const char*, 16> names = {
|
static const std::array<const char*, 15> names{"A32Reg", "A32ExtReg", "A64Reg", "A64Vec", "Opaque", "U1", "U8", "U16", "U32", "U64", "U128", "CoprocInfo", "NZCVFlags", "Cond", "Table"};
|
||||||
"Void", "A32Reg", "A32ExtReg", "A64Reg", "A64Vec", "Opaque", "U1", "U8", "U16", "U32", "U64", "F32", "F64", "CoprocInfo", "NZCVFlags", "Cond"
|
|
||||||
};
|
const size_t bits = static_cast<size_t>(type);
|
||||||
const size_t index = static_cast<size_t>(type);
|
if (bits == 0) {
|
||||||
if (index > names.size())
|
return "Void";
|
||||||
return fmt::format("Unknown Type {}", index);
|
}
|
||||||
return names.at(index);
|
|
||||||
|
std::string result;
|
||||||
|
for (size_t i = 0; i < names.size(); i++) {
|
||||||
|
if ((bits & (size_t(1) << i)) != 0) {
|
||||||
|
if (!result.empty()) {
|
||||||
|
result += '|';
|
||||||
|
}
|
||||||
|
result += names[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AreTypesCompatible(Type t1, Type t2) {
|
bool AreTypesCompatible(Type t1, Type t2) {
|
||||||
|
|
|
@ -32,6 +32,7 @@ enum class Type {
|
||||||
CoprocInfo = 1 << 11,
|
CoprocInfo = 1 << 11,
|
||||||
NZCVFlags = 1 << 12,
|
NZCVFlags = 1 << 12,
|
||||||
Cond = 1 << 13,
|
Cond = 1 << 13,
|
||||||
|
Table = 1 << 14,
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr Type operator|(Type a, Type b) {
|
constexpr Type operator|(Type a, Type b) {
|
||||||
|
|
|
@ -103,5 +103,6 @@ using U32U64 = TypedValue<Type::U32 | Type::U64>;
|
||||||
using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
|
using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
|
||||||
using UAnyU128 = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64 | Type::U128>;
|
using UAnyU128 = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64 | Type::U128>;
|
||||||
using NZCV = TypedValue<Type::NZCVFlags>;
|
using NZCV = TypedValue<Type::NZCVFlags>;
|
||||||
|
using Table = TypedValue<Type::Table>;
|
||||||
|
|
||||||
} // namespace Dynarmic::IR
|
} // namespace Dynarmic::IR
|
||||||
|
|
Loading…
Reference in a new issue