IR: Add VectorTable and VectorTableLookup IR instructions

This commit is contained in:
MerryMage 2018-08-18 21:08:34 +01:00
parent 0288974512
commit 89d08c7d61
12 changed files with 133 additions and 39 deletions

View file

@ -2696,6 +2696,56 @@ void EmitX64::EmitVectorSub64(EmitContext& ctx, IR::Inst* inst) {
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubq); EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubq);
} }
void EmitX64::EmitVectorTable(EmitContext&, IR::Inst* inst) {
// Do nothing. We *want* to hold on to the refcount for our arguments, so VectorTableLookup can use our arguments.
ASSERT_MSG(inst->UseCount() == 1, "Table cannot be used multiple times");
}
void EmitX64::EmitVectorTableLookup(EmitContext& ctx, IR::Inst* inst) {
ASSERT(inst->GetArg(1).GetInst()->GetOpcode() == IR::Opcode::VectorTable);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst());
const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem){ return !elem.IsVoid(); });
const u32 stack_space = static_cast<u32>((table_size + 2) * 16);
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
for (size_t i = 0; i < table_size; ++i) {
const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]);
code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value);
}
const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
code.movaps(xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16], defaults);
code.movaps(xword[rsp + ABI_SHADOW_SPACE + (table_size + 1) * 16], indicies);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + (table_size + 1) * 16]);
code.mov(code.ABI_PARAM4.cvt32(), table_size);
code.CallFunction(static_cast<void(*)(const VectorArray<u8>*, VectorArray<u8>&, const VectorArray<u8>&, size_t)>(
[](const VectorArray<u8>* table, VectorArray<u8>& result, const VectorArray<u8>& indicies, size_t table_size) {
for (size_t i = 0; i < result.size(); ++i) {
const size_t index = indicies[i] / table[0].size();
const size_t elem = indicies[i] % table[0].size();
if (index < table_size) {
result[i] = table[index][elem];
}
}
}
));
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result);
}
static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);

View file

@ -41,37 +41,47 @@ static bool CanExchange(HostLoc a, HostLoc b) {
// Minimum number of bits required to represent a type // Minimum number of bits required to represent a type
static size_t GetBitWidth(IR::Type type) { static size_t GetBitWidth(IR::Type type) {
switch (type) { switch (type) {
case IR::Type::A32Reg: case IR::Type::A32Reg:
case IR::Type::A32ExtReg: case IR::Type::A32ExtReg:
case IR::Type::A64Reg: case IR::Type::A64Reg:
case IR::Type::A64Vec: case IR::Type::A64Vec:
case IR::Type::CoprocInfo: case IR::Type::CoprocInfo:
case IR::Type::Cond: case IR::Type::Cond:
case IR::Type::Void: case IR::Type::Void:
ASSERT_MSG(false, "Type {} cannot be represented at runtime", type); case IR::Type::Table:
return 0; ASSERT_MSG(false, "Type {} cannot be represented at runtime", type);
case IR::Type::Opaque: return 0;
ASSERT_MSG(false, "Not a concrete type"); case IR::Type::Opaque:
return 0; ASSERT_MSG(false, "Not a concrete type");
case IR::Type::U1: return 0;
return 8; case IR::Type::U1:
case IR::Type::U8: return 8;
return 8; case IR::Type::U8:
case IR::Type::U16: return 8;
return 16; case IR::Type::U16:
case IR::Type::U32: return 16;
return 32; case IR::Type::U32:
case IR::Type::U64: return 32;
return 64; case IR::Type::U64:
case IR::Type::U128: return 64;
return 128; case IR::Type::U128:
case IR::Type::NZCVFlags: return 128;
return 32; // TODO: Update to 16 when flags optimization is done case IR::Type::NZCVFlags:
return 32; // TODO: Update to 16 when flags optimization is done
} }
UNREACHABLE(); UNREACHABLE();
return 0; return 0;
} }
static bool IsValuelessType(IR::Type type) {
switch (type) {
case IR::Type::Table:
return true;
default:
return false;
}
}
bool HostLocInfo::IsLocked() const { bool HostLocInfo::IsLocked() const {
return is_being_used; return is_being_used;
} }
@ -139,6 +149,10 @@ bool Argument::IsImmediate() const {
return value.IsImmediate(); return value.IsImmediate();
} }
bool Argument::IsVoid() const {
return GetType() == IR::Type::Void;
}
bool Argument::FitsInImmediateU32() const { bool Argument::FitsInImmediateU32() const {
if (!IsImmediate()) if (!IsImmediate())
return false; return false;
@ -209,11 +223,11 @@ bool Argument::IsInMemory() const {
} }
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) { RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
ArgumentInfo ret = { Argument{*this}, Argument{*this}, Argument{*this} }; ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
for (size_t i = 0; i < inst->NumArgs(); i++) { for (size_t i = 0; i < inst->NumArgs(); i++) {
const IR::Value& arg = inst->GetArg(i); const IR::Value& arg = inst->GetArg(i);
ret[i].value = arg; ret[i].value = arg;
if (!arg.IsImmediate()) { if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined"); ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
LocInfo(*ValueLocation(arg.GetInst())).AddArgReference(); LocInfo(*ValueLocation(arg.GetInst())).AddArgReference();
} }

View file

@ -61,6 +61,7 @@ struct Argument {
public: public:
IR::Type GetType() const; IR::Type GetType() const;
bool IsImmediate() const; bool IsImmediate() const;
bool IsVoid() const;
bool FitsInImmediateU32() const; bool FitsInImmediateU32() const;
bool FitsInImmediateS32() const; bool FitsInImmediateS32() const;
@ -82,7 +83,7 @@ public:
private: private:
friend class RegAlloc; friend class RegAlloc;
Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {} explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
bool allocated = false; bool allocated = false;
RegAlloc& reg_alloc; RegAlloc& reg_alloc;
@ -91,7 +92,7 @@ private:
class RegAlloc final { class RegAlloc final {
public: public:
using ArgumentInfo = std::array<Argument, 3>; using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr) explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr)
: hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {} : hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}

View file

@ -1537,6 +1537,16 @@ U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
return {}; return {};
} }
Table IREmitter::VectorTable(std::vector<U128> values) {
ASSERT(values.size() >= 1 && values.size() <= 4);
values.resize(4);
return Inst<Table>(Opcode::VectorTable, values[0], values[1], values[2], values[3]);
}
U128 IREmitter::VectorTableLookup(const U128& defaults, const Table& table, const U128& indices) {
return Inst<U128>(Opcode::VectorTableLookup, defaults, table, indices);
}
U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b) { U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b) {
switch (esize) { switch (esize) {
case 8: case 8:

View file

@ -266,6 +266,8 @@ public:
U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a); U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a); U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
U128 VectorSub(size_t esize, const U128& a, const U128& b); U128 VectorSub(size_t esize, const U128& a, const U128& b);
Table VectorTable(std::vector<U128> values);
U128 VectorTableLookup(const U128& defaults, const Table& table, const U128& indices);
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b); U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a); U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a);
U128 VectorZeroExtend(size_t original_esize, const U128& a); U128 VectorZeroExtend(size_t original_esize, const U128& a);

View file

@ -489,7 +489,7 @@ size_t Inst::NumArgs() const {
Value Inst::GetArg(size_t index) const { Value Inst::GetArg(size_t index) const {
ASSERT_MSG(index < GetNumArgsOf(op), "Inst::GetArg: index {} >= number of arguments of {} ({})", index, op, GetNumArgsOf(op)); ASSERT_MSG(index < GetNumArgsOf(op), "Inst::GetArg: index {} >= number of arguments of {} ({})", index, op, GetNumArgsOf(op));
ASSERT_MSG(!args[index].IsEmpty(), "Inst::GetArg: index {} is empty", index); ASSERT_MSG(!args[index].IsEmpty() || GetArgTypeOf(op, index) == IR::Type::Opaque, "Inst::GetArg: index {} is empty", index, args[index].GetType());
return args[index]; return args[index];
} }

View file

@ -17,6 +17,8 @@ namespace Dynarmic::IR {
enum class Opcode; enum class Opcode;
enum class Type; enum class Type;
constexpr size_t max_arg_count = 4;
/** /**
* A representation of a microinstruction. A single ARM/Thumb instruction may be * A representation of a microinstruction. A single ARM/Thumb instruction may be
* converted into zero or more microinstructions. * converted into zero or more microinstructions.
@ -136,7 +138,7 @@ private:
Opcode op; Opcode op;
size_t use_count = 0; size_t use_count = 0;
std::array<Value, 3> args; std::array<Value, max_arg_count> args;
// Pointers to related pseudooperations: // Pointers to related pseudooperations:
// Since not all combinations are possible, we use a union to save space // Since not all combinations are possible, we use a union to save space

View file

@ -43,6 +43,7 @@ constexpr Type U128 = Type::U128;
constexpr Type CoprocInfo = Type::CoprocInfo; constexpr Type CoprocInfo = Type::CoprocInfo;
constexpr Type NZCV = Type::NZCVFlags; constexpr Type NZCV = Type::NZCVFlags;
constexpr Type Cond = Type::Cond; constexpr Type Cond = Type::Cond;
constexpr Type Table = Type::Table;
static const std::map<Opcode, Meta> opcode_info {{ static const std::map<Opcode, Meta> opcode_info {{
#define OPCODE(name, type, ...) { Opcode::name, { #name, type, { __VA_ARGS__ } } }, #define OPCODE(name, type, ...) { Opcode::name, { #name, type, { __VA_ARGS__ } } },

View file

@ -401,6 +401,8 @@ OPCODE(VectorSub8, U128, U128,
OPCODE(VectorSub16, U128, U128, U128 ) OPCODE(VectorSub16, U128, U128, U128 )
OPCODE(VectorSub32, U128, U128, U128 ) OPCODE(VectorSub32, U128, U128, U128 )
OPCODE(VectorSub64, U128, U128, U128 ) OPCODE(VectorSub64, U128, U128, U128 )
OPCODE(VectorTable, Table, U128, Opaque, Opaque, Opaque )
OPCODE(VectorTableLookup, U128, U128, Table, U128 )
OPCODE(VectorUnsignedAbsoluteDifference8, U128, U128, U128 ) OPCODE(VectorUnsignedAbsoluteDifference8, U128, U128, U128 )
OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128, U128 ) OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128, U128 )
OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 ) OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 )

View file

@ -16,13 +16,23 @@
namespace Dynarmic::IR { namespace Dynarmic::IR {
std::string GetNameOf(Type type) { std::string GetNameOf(Type type) {
static const std::array<const char*, 16> names = { static const std::array<const char*, 15> names{"A32Reg", "A32ExtReg", "A64Reg", "A64Vec", "Opaque", "U1", "U8", "U16", "U32", "U64", "U128", "CoprocInfo", "NZCVFlags", "Cond", "Table"};
"Void", "A32Reg", "A32ExtReg", "A64Reg", "A64Vec", "Opaque", "U1", "U8", "U16", "U32", "U64", "F32", "F64", "CoprocInfo", "NZCVFlags", "Cond"
}; const size_t bits = static_cast<size_t>(type);
const size_t index = static_cast<size_t>(type); if (bits == 0) {
if (index > names.size()) return "Void";
return fmt::format("Unknown Type {}", index); }
return names.at(index);
std::string result;
for (size_t i = 0; i < names.size(); i++) {
if ((bits & (size_t(1) << i)) != 0) {
if (!result.empty()) {
result += '|';
}
result += names[i];
}
}
return result;
} }
bool AreTypesCompatible(Type t1, Type t2) { bool AreTypesCompatible(Type t1, Type t2) {

View file

@ -32,6 +32,7 @@ enum class Type {
CoprocInfo = 1 << 11, CoprocInfo = 1 << 11,
NZCVFlags = 1 << 12, NZCVFlags = 1 << 12,
Cond = 1 << 13, Cond = 1 << 13,
Table = 1 << 14,
}; };
constexpr Type operator|(Type a, Type b) { constexpr Type operator|(Type a, Type b) {

View file

@ -103,5 +103,6 @@ using U32U64 = TypedValue<Type::U32 | Type::U64>;
using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>; using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
using UAnyU128 = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64 | Type::U128>; using UAnyU128 = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64 | Type::U128>;
using NZCV = TypedValue<Type::NZCVFlags>; using NZCV = TypedValue<Type::NZCVFlags>;
using Table = TypedValue<Type::Table>;
} // namespace Dynarmic::IR } // namespace Dynarmic::IR