IR: Add VectorTable and VectorTableLookup IR instructions
This commit is contained in:
parent
0288974512
commit
89d08c7d61
12 changed files with 133 additions and 39 deletions
|
@ -2696,6 +2696,56 @@ void EmitX64::EmitVectorSub64(EmitContext& ctx, IR::Inst* inst) {
|
|||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubq);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorTable(EmitContext&, IR::Inst* inst) {
|
||||
// Do nothing. We *want* to hold on to the refcount for our arguments, so VectorTableLookup can use our arguments.
|
||||
ASSERT_MSG(inst->UseCount() == 1, "Table cannot be used multiple times");
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorTableLookup(EmitContext& ctx, IR::Inst* inst) {
|
||||
ASSERT(inst->GetArg(1).GetInst()->GetOpcode() == IR::Opcode::VectorTable);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst());
|
||||
|
||||
const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem){ return !elem.IsVoid(); });
|
||||
|
||||
const u32 stack_space = static_cast<u32>((table_size + 2) * 16);
|
||||
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
for (size_t i = 0; i < table_size; ++i) {
|
||||
const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]);
|
||||
code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value);
|
||||
}
|
||||
const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
ctx.reg_alloc.EndOfAllocScope();
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
|
||||
code.movaps(xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16], defaults);
|
||||
code.movaps(xword[rsp + ABI_SHADOW_SPACE + (table_size + 1) * 16], indicies);
|
||||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + (table_size + 1) * 16]);
|
||||
code.mov(code.ABI_PARAM4.cvt32(), table_size);
|
||||
|
||||
code.CallFunction(static_cast<void(*)(const VectorArray<u8>*, VectorArray<u8>&, const VectorArray<u8>&, size_t)>(
|
||||
[](const VectorArray<u8>* table, VectorArray<u8>& result, const VectorArray<u8>& indicies, size_t table_size) {
|
||||
for (size_t i = 0; i < result.size(); ++i) {
|
||||
const size_t index = indicies[i] / table[0].size();
|
||||
const size_t elem = indicies[i] % table[0].size();
|
||||
if (index < table_size) {
|
||||
result[i] = table[index][elem];
|
||||
}
|
||||
}
|
||||
}
|
||||
));
|
||||
|
||||
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
|
||||
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
|
|
|
@ -41,37 +41,47 @@ static bool CanExchange(HostLoc a, HostLoc b) {
|
|||
// Minimum number of bits required to represent a type
|
||||
static size_t GetBitWidth(IR::Type type) {
|
||||
switch (type) {
|
||||
case IR::Type::A32Reg:
|
||||
case IR::Type::A32ExtReg:
|
||||
case IR::Type::A64Reg:
|
||||
case IR::Type::A64Vec:
|
||||
case IR::Type::CoprocInfo:
|
||||
case IR::Type::Cond:
|
||||
case IR::Type::Void:
|
||||
ASSERT_MSG(false, "Type {} cannot be represented at runtime", type);
|
||||
return 0;
|
||||
case IR::Type::Opaque:
|
||||
ASSERT_MSG(false, "Not a concrete type");
|
||||
return 0;
|
||||
case IR::Type::U1:
|
||||
return 8;
|
||||
case IR::Type::U8:
|
||||
return 8;
|
||||
case IR::Type::U16:
|
||||
return 16;
|
||||
case IR::Type::U32:
|
||||
return 32;
|
||||
case IR::Type::U64:
|
||||
return 64;
|
||||
case IR::Type::U128:
|
||||
return 128;
|
||||
case IR::Type::NZCVFlags:
|
||||
return 32; // TODO: Update to 16 when flags optimization is done
|
||||
case IR::Type::A32Reg:
|
||||
case IR::Type::A32ExtReg:
|
||||
case IR::Type::A64Reg:
|
||||
case IR::Type::A64Vec:
|
||||
case IR::Type::CoprocInfo:
|
||||
case IR::Type::Cond:
|
||||
case IR::Type::Void:
|
||||
case IR::Type::Table:
|
||||
ASSERT_MSG(false, "Type {} cannot be represented at runtime", type);
|
||||
return 0;
|
||||
case IR::Type::Opaque:
|
||||
ASSERT_MSG(false, "Not a concrete type");
|
||||
return 0;
|
||||
case IR::Type::U1:
|
||||
return 8;
|
||||
case IR::Type::U8:
|
||||
return 8;
|
||||
case IR::Type::U16:
|
||||
return 16;
|
||||
case IR::Type::U32:
|
||||
return 32;
|
||||
case IR::Type::U64:
|
||||
return 64;
|
||||
case IR::Type::U128:
|
||||
return 128;
|
||||
case IR::Type::NZCVFlags:
|
||||
return 32; // TODO: Update to 16 when flags optimization is done
|
||||
}
|
||||
UNREACHABLE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool IsValuelessType(IR::Type type) {
|
||||
switch (type) {
|
||||
case IR::Type::Table:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool HostLocInfo::IsLocked() const {
|
||||
return is_being_used;
|
||||
}
|
||||
|
@ -139,6 +149,10 @@ bool Argument::IsImmediate() const {
|
|||
return value.IsImmediate();
|
||||
}
|
||||
|
||||
bool Argument::IsVoid() const {
|
||||
return GetType() == IR::Type::Void;
|
||||
}
|
||||
|
||||
bool Argument::FitsInImmediateU32() const {
|
||||
if (!IsImmediate())
|
||||
return false;
|
||||
|
@ -209,11 +223,11 @@ bool Argument::IsInMemory() const {
|
|||
}
|
||||
|
||||
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
|
||||
ArgumentInfo ret = { Argument{*this}, Argument{*this}, Argument{*this} };
|
||||
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
|
||||
for (size_t i = 0; i < inst->NumArgs(); i++) {
|
||||
const IR::Value& arg = inst->GetArg(i);
|
||||
ret[i].value = arg;
|
||||
if (!arg.IsImmediate()) {
|
||||
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
|
||||
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
|
||||
LocInfo(*ValueLocation(arg.GetInst())).AddArgReference();
|
||||
}
|
||||
|
|
|
@ -61,6 +61,7 @@ struct Argument {
|
|||
public:
|
||||
IR::Type GetType() const;
|
||||
bool IsImmediate() const;
|
||||
bool IsVoid() const;
|
||||
|
||||
bool FitsInImmediateU32() const;
|
||||
bool FitsInImmediateS32() const;
|
||||
|
@ -82,7 +83,7 @@ public:
|
|||
|
||||
private:
|
||||
friend class RegAlloc;
|
||||
Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
|
||||
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
|
||||
|
||||
bool allocated = false;
|
||||
RegAlloc& reg_alloc;
|
||||
|
@ -91,7 +92,7 @@ private:
|
|||
|
||||
class RegAlloc final {
|
||||
public:
|
||||
using ArgumentInfo = std::array<Argument, 3>;
|
||||
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
|
||||
|
||||
explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr)
|
||||
: hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
|
||||
|
|
|
@ -1537,6 +1537,16 @@ U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
|
|||
return {};
|
||||
}
|
||||
|
||||
Table IREmitter::VectorTable(std::vector<U128> values) {
|
||||
ASSERT(values.size() >= 1 && values.size() <= 4);
|
||||
values.resize(4);
|
||||
return Inst<Table>(Opcode::VectorTable, values[0], values[1], values[2], values[3]);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorTableLookup(const U128& defaults, const Table& table, const U128& indices) {
|
||||
return Inst<U128>(Opcode::VectorTableLookup, defaults, table, indices);
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
|
|
|
@ -266,6 +266,8 @@ public:
|
|||
U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
|
||||
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
|
||||
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
||||
Table VectorTable(std::vector<U128> values);
|
||||
U128 VectorTableLookup(const U128& defaults, const Table& table, const U128& indices);
|
||||
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a);
|
||||
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
||||
|
|
|
@ -489,7 +489,7 @@ size_t Inst::NumArgs() const {
|
|||
|
||||
Value Inst::GetArg(size_t index) const {
|
||||
ASSERT_MSG(index < GetNumArgsOf(op), "Inst::GetArg: index {} >= number of arguments of {} ({})", index, op, GetNumArgsOf(op));
|
||||
ASSERT_MSG(!args[index].IsEmpty(), "Inst::GetArg: index {} is empty", index);
|
||||
ASSERT_MSG(!args[index].IsEmpty() || GetArgTypeOf(op, index) == IR::Type::Opaque, "Inst::GetArg: index {} is empty", index, args[index].GetType());
|
||||
|
||||
return args[index];
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@ namespace Dynarmic::IR {
|
|||
enum class Opcode;
|
||||
enum class Type;
|
||||
|
||||
constexpr size_t max_arg_count = 4;
|
||||
|
||||
/**
|
||||
* A representation of a microinstruction. A single ARM/Thumb instruction may be
|
||||
* converted into zero or more microinstructions.
|
||||
|
@ -136,7 +138,7 @@ private:
|
|||
|
||||
Opcode op;
|
||||
size_t use_count = 0;
|
||||
std::array<Value, 3> args;
|
||||
std::array<Value, max_arg_count> args;
|
||||
|
||||
// Pointers to related pseudooperations:
|
||||
// Since not all combinations are possible, we use a union to save space
|
||||
|
|
|
@ -43,6 +43,7 @@ constexpr Type U128 = Type::U128;
|
|||
constexpr Type CoprocInfo = Type::CoprocInfo;
|
||||
constexpr Type NZCV = Type::NZCVFlags;
|
||||
constexpr Type Cond = Type::Cond;
|
||||
constexpr Type Table = Type::Table;
|
||||
|
||||
static const std::map<Opcode, Meta> opcode_info {{
|
||||
#define OPCODE(name, type, ...) { Opcode::name, { #name, type, { __VA_ARGS__ } } },
|
||||
|
|
|
@ -401,6 +401,8 @@ OPCODE(VectorSub8, U128, U128,
|
|||
OPCODE(VectorSub16, U128, U128, U128 )
|
||||
OPCODE(VectorSub32, U128, U128, U128 )
|
||||
OPCODE(VectorSub64, U128, U128, U128 )
|
||||
OPCODE(VectorTable, Table, U128, Opaque, Opaque, Opaque )
|
||||
OPCODE(VectorTableLookup, U128, U128, Table, U128 )
|
||||
OPCODE(VectorUnsignedAbsoluteDifference8, U128, U128, U128 )
|
||||
OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128, U128 )
|
||||
OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 )
|
||||
|
|
|
@ -16,13 +16,23 @@
|
|||
namespace Dynarmic::IR {
|
||||
|
||||
std::string GetNameOf(Type type) {
|
||||
static const std::array<const char*, 16> names = {
|
||||
"Void", "A32Reg", "A32ExtReg", "A64Reg", "A64Vec", "Opaque", "U1", "U8", "U16", "U32", "U64", "F32", "F64", "CoprocInfo", "NZCVFlags", "Cond"
|
||||
};
|
||||
const size_t index = static_cast<size_t>(type);
|
||||
if (index > names.size())
|
||||
return fmt::format("Unknown Type {}", index);
|
||||
return names.at(index);
|
||||
static const std::array<const char*, 15> names{"A32Reg", "A32ExtReg", "A64Reg", "A64Vec", "Opaque", "U1", "U8", "U16", "U32", "U64", "U128", "CoprocInfo", "NZCVFlags", "Cond", "Table"};
|
||||
|
||||
const size_t bits = static_cast<size_t>(type);
|
||||
if (bits == 0) {
|
||||
return "Void";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
for (size_t i = 0; i < names.size(); i++) {
|
||||
if ((bits & (size_t(1) << i)) != 0) {
|
||||
if (!result.empty()) {
|
||||
result += '|';
|
||||
}
|
||||
result += names[i];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool AreTypesCompatible(Type t1, Type t2) {
|
||||
|
|
|
@ -32,6 +32,7 @@ enum class Type {
|
|||
CoprocInfo = 1 << 11,
|
||||
NZCVFlags = 1 << 12,
|
||||
Cond = 1 << 13,
|
||||
Table = 1 << 14,
|
||||
};
|
||||
|
||||
constexpr Type operator|(Type a, Type b) {
|
||||
|
|
|
@ -103,5 +103,6 @@ using U32U64 = TypedValue<Type::U32 | Type::U64>;
|
|||
using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
|
||||
using UAnyU128 = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64 | Type::U128>;
|
||||
using NZCV = TypedValue<Type::NZCVFlags>;
|
||||
using Table = TypedValue<Type::Table>;
|
||||
|
||||
} // namespace Dynarmic::IR
|
||||
|
|
Loading…
Reference in a new issue