IR: Add VectorTable and VectorTableLookup IR instructions

2018-08-18 21:08:34 +01:00 · 2018-08-18 21:08:34 +01:00 · 89d08c7d61
commit 89d08c7d61
parent 0288974512
12 changed files with 133 additions and 39 deletions
--- a/src/backend_x64/emit_x64_vector.cpp
+++ b/src/backend_x64/emit_x64_vector.cpp
@ -2696,6 +2696,56 @@ void EmitX64::EmitVectorSub64(EmitContext& ctx, IR::Inst* inst) {
    EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubq);
 }

+void EmitX64::EmitVectorTable(EmitContext&, IR::Inst* inst) {
+    // Do nothing. We *want* to hold on to the refcount for our arguments, so VectorTableLookup can use our arguments.
+    ASSERT_MSG(inst->UseCount() == 1, "Table cannot be used multiple times");
+}
+
+void EmitX64::EmitVectorTableLookup(EmitContext& ctx, IR::Inst* inst) {
+    ASSERT(inst->GetArg(1).GetInst()->GetOpcode() == IR::Opcode::VectorTable);
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst());
+
+    const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem){ return !elem.IsVoid(); });
+
+    const u32 stack_space = static_cast<u32>((table_size + 2) * 16);
+    code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
+    for (size_t i = 0; i < table_size; ++i) {
+        const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]);
+        code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value);
+    }
+    const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(args[0]);
+    const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]);
+    const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
+    ctx.reg_alloc.EndOfAllocScope();
+    ctx.reg_alloc.HostCall(nullptr);
+
+    code.movaps(xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16], defaults);
+    code.movaps(xword[rsp + ABI_SHADOW_SPACE + (table_size + 1) * 16], indicies);
+    code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]);
+    code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
+    code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + (table_size + 1) * 16]);
+    code.mov(code.ABI_PARAM4.cvt32(), table_size);
+
+    code.CallFunction(static_cast<void(*)(const VectorArray<u8>*, VectorArray<u8>&, const VectorArray<u8>&, size_t)>(
+        [](const VectorArray<u8>* table, VectorArray<u8>& result, const VectorArray<u8>& indicies, size_t table_size) {
+            for (size_t i = 0; i < result.size(); ++i) {
+                const size_t index = indicies[i] / table[0].size();
+                const size_t elem = indicies[i] % table[0].size();
+                if (index < table_size) {
+                    result[i] = table[index][elem];
+                }
+            }
+        }
+    ));
+
+    code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]);
+    code.add(rsp, stack_space + ABI_SHADOW_SPACE);
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
 static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);

--- a/src/backend_x64/reg_alloc.cpp
+++ b/src/backend_x64/reg_alloc.cpp
@ -41,37 +41,47 @@ static bool CanExchange(HostLoc a, HostLoc b) {
 // Minimum number of bits required to represent a type
 static size_t GetBitWidth(IR::Type type) {
    switch (type) {
-        case IR::Type::A32Reg:
-        case IR::Type::A32ExtReg:
-        case IR::Type::A64Reg:
-        case IR::Type::A64Vec:
-        case IR::Type::CoprocInfo:
-        case IR::Type::Cond:
-        case IR::Type::Void:
-            ASSERT_MSG(false, "Type {} cannot be represented at runtime", type);
-            return 0;
-        case IR::Type::Opaque:
-            ASSERT_MSG(false, "Not a concrete type");
-            return 0;
-        case IR::Type::U1:
-            return 8;
-        case IR::Type::U8:
-            return 8;
-        case IR::Type::U16:
-            return 16;
-        case IR::Type::U32:
-            return 32;
-        case IR::Type::U64:
-            return 64;
-        case IR::Type::U128:
-            return 128;
-        case IR::Type::NZCVFlags:
-            return 32; // TODO: Update to 16 when flags optimization is done
+    case IR::Type::A32Reg:
+    case IR::Type::A32ExtReg:
+    case IR::Type::A64Reg:
+    case IR::Type::A64Vec:
+    case IR::Type::CoprocInfo:
+    case IR::Type::Cond:
+    case IR::Type::Void:
+    case IR::Type::Table:
+        ASSERT_MSG(false, "Type {} cannot be represented at runtime", type);
+        return 0;
+    case IR::Type::Opaque:
+        ASSERT_MSG(false, "Not a concrete type");
+        return 0;
+    case IR::Type::U1:
+        return 8;
+    case IR::Type::U8:
+        return 8;
+    case IR::Type::U16:
+        return 16;
+    case IR::Type::U32:
+        return 32;
+    case IR::Type::U64:
+        return 64;
+    case IR::Type::U128:
+        return 128;
+    case IR::Type::NZCVFlags:
+        return 32; // TODO: Update to 16 when flags optimization is done
    }
    UNREACHABLE();
    return 0;
 }

+static bool IsValuelessType(IR::Type type) {
+    switch (type) {
+    case IR::Type::Table:
+        return true;
+    default:
+        return false;
+    }
+}
+
 bool HostLocInfo::IsLocked() const {
    return is_being_used;
 }
@ -139,6 +149,10 @@ bool Argument::IsImmediate() const {
    return value.IsImmediate();
 }

+bool Argument::IsVoid() const {
+    return GetType() == IR::Type::Void;
+}
+
 bool Argument::FitsInImmediateU32() const {
    if (!IsImmediate())
        return false;
@ -209,11 +223,11 @@ bool Argument::IsInMemory() const {
 }

 RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
-    ArgumentInfo ret = { Argument{*this}, Argument{*this}, Argument{*this} };
+    ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
    for (size_t i = 0; i < inst->NumArgs(); i++) {
        const IR::Value& arg = inst->GetArg(i);
        ret[i].value = arg;
-        if (!arg.IsImmediate()) {
+        if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
            ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
            LocInfo(*ValueLocation(arg.GetInst())).AddArgReference();
        }
--- a/src/backend_x64/reg_alloc.h
+++ b/src/backend_x64/reg_alloc.h
@ -61,6 +61,7 @@ struct Argument {
 public:
    IR::Type GetType() const;
    bool IsImmediate() const;
+    bool IsVoid() const;

    bool FitsInImmediateU32() const;
    bool FitsInImmediateS32() const;
@ -82,7 +83,7 @@ public:

 private:
    friend class RegAlloc;
-    Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
+    explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}

    bool allocated = false;
    RegAlloc& reg_alloc;
@ -91,7 +92,7 @@ private:

 class RegAlloc final {
 public:
-    using ArgumentInfo = std::array<Argument, 3>;
+    using ArgumentInfo = std::array<Argument, IR::max_arg_count>;

    explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr)
        : hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -1537,6 +1537,16 @@ U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
    return {};
 }

+Table IREmitter::VectorTable(std::vector<U128> values) {
+    ASSERT(values.size() >= 1 && values.size() <= 4);
+    values.resize(4);
+    return Inst<Table>(Opcode::VectorTable, values[0], values[1], values[2], values[3]);
+}
+
+U128 IREmitter::VectorTableLookup(const U128& defaults, const Table& table, const U128& indices) {
+    return Inst<U128>(Opcode::VectorTableLookup, defaults, table, indices);
+}
+
 U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b) {
    switch (esize) {
    case 8:
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -266,6 +266,8 @@ public:
    U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
    U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
    U128 VectorSub(size_t esize, const U128& a, const U128& b);
+    Table VectorTable(std::vector<U128> values);
+    U128 VectorTableLookup(const U128& defaults, const Table& table, const U128& indices);
    U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
    U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a);
    U128 VectorZeroExtend(size_t original_esize, const U128& a);
--- a/src/frontend/ir/microinstruction.cpp
+++ b/src/frontend/ir/microinstruction.cpp
@ -489,7 +489,7 @@ size_t Inst::NumArgs() const {

 Value Inst::GetArg(size_t index) const {
    ASSERT_MSG(index < GetNumArgsOf(op), "Inst::GetArg: index {} >= number of arguments of {} ({})", index, op, GetNumArgsOf(op));
-    ASSERT_MSG(!args[index].IsEmpty(), "Inst::GetArg: index {} is empty", index);
+    ASSERT_MSG(!args[index].IsEmpty() || GetArgTypeOf(op, index) == IR::Type::Opaque, "Inst::GetArg: index {} is empty", index, args[index].GetType());

    return args[index];
 }
--- a/src/frontend/ir/microinstruction.h
+++ b/src/frontend/ir/microinstruction.h
@ -17,6 +17,8 @@ namespace Dynarmic::IR {
 enum class Opcode;
 enum class Type;

+constexpr size_t max_arg_count = 4;
+
 /**
 * A representation of a microinstruction. A single ARM/Thumb instruction may be
 * converted into zero or more microinstructions.
@ -136,7 +138,7 @@ private:

    Opcode op;
    size_t use_count = 0;
-    std::array<Value, 3> args;
+    std::array<Value, max_arg_count> args;

    // Pointers to related pseudooperations:
    // Since not all combinations are possible, we use a union to save space
--- a/src/frontend/ir/opcodes.cpp
+++ b/src/frontend/ir/opcodes.cpp
@ -43,6 +43,7 @@ constexpr Type U128 = Type::U128;
 constexpr Type CoprocInfo = Type::CoprocInfo;
 constexpr Type NZCV = Type::NZCVFlags;
 constexpr Type Cond = Type::Cond;
+constexpr Type Table = Type::Table;

 static const std::map<Opcode, Meta> opcode_info {{
 #define OPCODE(name, type, ...) { Opcode::name, { #name, type, { __VA_ARGS__ } } },
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -401,6 +401,8 @@ OPCODE(VectorSub8,                              U128,           U128,
 OPCODE(VectorSub16,                             U128,           U128,           U128                                            )
 OPCODE(VectorSub32,                             U128,           U128,           U128                                            )
 OPCODE(VectorSub64,                             U128,           U128,           U128                                            )
+OPCODE(VectorTable,                             Table,          U128,           Opaque,         Opaque,         Opaque          )
+OPCODE(VectorTableLookup,                       U128,           U128,           Table,          U128                            )
 OPCODE(VectorUnsignedAbsoluteDifference8,       U128,           U128,           U128                                            )
 OPCODE(VectorUnsignedAbsoluteDifference16,      U128,           U128,           U128                                            )
 OPCODE(VectorUnsignedAbsoluteDifference32,      U128,           U128,           U128                                            )
--- a/src/frontend/ir/type.cpp
+++ b/src/frontend/ir/type.cpp
@ -16,13 +16,23 @@
 namespace Dynarmic::IR {

 std::string GetNameOf(Type type) {
-    static const std::array<const char*, 16> names = {
-        "Void", "A32Reg", "A32ExtReg", "A64Reg", "A64Vec", "Opaque", "U1", "U8", "U16", "U32", "U64", "F32", "F64", "CoprocInfo", "NZCVFlags", "Cond"
-    };
-    const size_t index = static_cast<size_t>(type);
-    if (index > names.size())
-        return fmt::format("Unknown Type {}", index);
-    return names.at(index);
+    static const std::array<const char*, 15> names{"A32Reg", "A32ExtReg", "A64Reg", "A64Vec", "Opaque", "U1", "U8", "U16", "U32", "U64", "U128", "CoprocInfo", "NZCVFlags", "Cond", "Table"};
+
+    const size_t bits = static_cast<size_t>(type);
+    if (bits == 0) {
+        return "Void";
+    }
+
+    std::string result;
+    for (size_t i = 0; i < names.size(); i++) {
+        if ((bits & (size_t(1) << i)) != 0) {
+            if (!result.empty()) {
+                result += '|';
+            }
+            result += names[i];
+        }
+    }
+    return result;
 }

 bool AreTypesCompatible(Type t1, Type t2) {
--- a/src/frontend/ir/type.h
+++ b/src/frontend/ir/type.h
@ -32,6 +32,7 @@ enum class Type {
    CoprocInfo = 1 << 11,
    NZCVFlags = 1 << 12,
    Cond = 1 << 13,
+    Table = 1 << 14,
 };

 constexpr Type operator|(Type a, Type b) {
--- a/src/frontend/ir/value.h
+++ b/src/frontend/ir/value.h
@ -103,5 +103,6 @@ using U32U64 = TypedValue<Type::U32 | Type::U64>;
 using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
 using UAnyU128 = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64 | Type::U128>;
 using NZCV = TypedValue<Type::NZCVFlags>;
+using Table = TypedValue<Type::Table>;

 } // namespace Dynarmic::IR