From 89d08c7d615d2e85ad8ea12fe4ff0c4b050d9fbe Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 18 Aug 2018 21:08:34 +0100 Subject: [PATCH] IR: Add VectorTable and VectorTableLookup IR instructions --- src/backend_x64/emit_x64_vector.cpp | 50 ++++++++++++++++++++ src/backend_x64/reg_alloc.cpp | 70 +++++++++++++++++----------- src/backend_x64/reg_alloc.h | 5 +- src/frontend/ir/ir_emitter.cpp | 10 ++++ src/frontend/ir/ir_emitter.h | 2 + src/frontend/ir/microinstruction.cpp | 2 +- src/frontend/ir/microinstruction.h | 4 +- src/frontend/ir/opcodes.cpp | 1 + src/frontend/ir/opcodes.inc | 2 + src/frontend/ir/type.cpp | 24 +++++++--- src/frontend/ir/type.h | 1 + src/frontend/ir/value.h | 1 + 12 files changed, 133 insertions(+), 39 deletions(-) diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 23e949bb..9e3cb6bc 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -2696,6 +2696,56 @@ void EmitX64::EmitVectorSub64(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubq); } +void EmitX64::EmitVectorTable(EmitContext&, IR::Inst* inst) { + // Do nothing. We *want* to hold on to the refcount for our arguments, so VectorTableLookup can use our arguments. + ASSERT_MSG(inst->UseCount() == 1, "Table cannot be used multiple times"); +} + +void EmitX64::EmitVectorTableLookup(EmitContext& ctx, IR::Inst* inst) { + ASSERT(inst->GetArg(1).GetInst()->GetOpcode() == IR::Opcode::VectorTable); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + auto table = ctx.reg_alloc.GetArgumentInfo(inst->GetArg(1).GetInst()); + + const size_t table_size = std::count_if(table.begin(), table.end(), [](const auto& elem){ return !elem.IsVoid(); }); + + const u32 stack_space = static_cast((table_size + 2) * 16); + code.sub(rsp, stack_space + ABI_SHADOW_SPACE); + for (size_t i = 0; i < table_size; ++i) { + const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]); + code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value); + } + const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + ctx.reg_alloc.EndOfAllocScope(); + ctx.reg_alloc.HostCall(nullptr); + + code.movaps(xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16], defaults); + code.movaps(xword[rsp + ABI_SHADOW_SPACE + (table_size + 1) * 16], indicies); + code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]); + code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]); + code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + (table_size + 1) * 16]); + code.mov(code.ABI_PARAM4.cvt32(), table_size); + + code.CallFunction(static_cast*, VectorArray&, const VectorArray&, size_t)>( + [](const VectorArray* table, VectorArray& result, const VectorArray& indicies, size_t table_size) { + for (size_t i = 0; i < result.size(); ++i) { + const size_t index = indicies[i] / table[0].size(); + const size_t elem = indicies[i] % table[0].size(); + if (index < table_size) { + result[i] = table[index][elem]; + } + } + } + )); + + code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]); + code.add(rsp, stack_space + ABI_SHADOW_SPACE); + + ctx.reg_alloc.DefineValue(inst, result); +} + static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/backend_x64/reg_alloc.cpp b/src/backend_x64/reg_alloc.cpp index 78810998..83ab5bee 100644 --- a/src/backend_x64/reg_alloc.cpp +++ b/src/backend_x64/reg_alloc.cpp @@ -41,37 +41,47 @@ static bool CanExchange(HostLoc a, HostLoc b) { // Minimum number of bits required to represent a type static size_t GetBitWidth(IR::Type type) { switch (type) { - case IR::Type::A32Reg: - case IR::Type::A32ExtReg: - case IR::Type::A64Reg: - case IR::Type::A64Vec: - case IR::Type::CoprocInfo: - case IR::Type::Cond: - case IR::Type::Void: - ASSERT_MSG(false, "Type {} cannot be represented at runtime", type); - return 0; - case IR::Type::Opaque: - ASSERT_MSG(false, "Not a concrete type"); - return 0; - case IR::Type::U1: - return 8; - case IR::Type::U8: - return 8; - case IR::Type::U16: - return 16; - case IR::Type::U32: - return 32; - case IR::Type::U64: - return 64; - case IR::Type::U128: - return 128; - case IR::Type::NZCVFlags: - return 32; // TODO: Update to 16 when flags optimization is done + case IR::Type::A32Reg: + case IR::Type::A32ExtReg: + case IR::Type::A64Reg: + case IR::Type::A64Vec: + case IR::Type::CoprocInfo: + case IR::Type::Cond: + case IR::Type::Void: + case IR::Type::Table: + ASSERT_MSG(false, "Type {} cannot be represented at runtime", type); + return 0; + case IR::Type::Opaque: + ASSERT_MSG(false, "Not a concrete type"); + return 0; + case IR::Type::U1: + return 8; + case IR::Type::U8: + return 8; + case IR::Type::U16: + return 16; + case IR::Type::U32: + return 32; + case IR::Type::U64: + return 64; + case IR::Type::U128: + return 128; + case IR::Type::NZCVFlags: + return 32; // TODO: Update to 16 when flags optimization is done } UNREACHABLE(); return 0; } +static bool IsValuelessType(IR::Type type) { + switch (type) { + case IR::Type::Table: + return true; + default: + return false; + } +} + bool HostLocInfo::IsLocked() const { return is_being_used; } @@ -139,6 +149,10 @@ bool Argument::IsImmediate() const { return value.IsImmediate(); } +bool Argument::IsVoid() const { + return GetType() == IR::Type::Void; +} + bool Argument::FitsInImmediateU32() const { if (!IsImmediate()) return false; @@ -209,11 +223,11 @@ bool Argument::IsInMemory() const { } RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) { - ArgumentInfo ret = { Argument{*this}, Argument{*this}, Argument{*this} }; + ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}}; for (size_t i = 0; i < inst->NumArgs(); i++) { const IR::Value& arg = inst->GetArg(i); ret[i].value = arg; - if (!arg.IsImmediate()) { + if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) { ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined"); LocInfo(*ValueLocation(arg.GetInst())).AddArgReference(); } diff --git a/src/backend_x64/reg_alloc.h b/src/backend_x64/reg_alloc.h index ac352167..86bd9766 100644 --- a/src/backend_x64/reg_alloc.h +++ b/src/backend_x64/reg_alloc.h @@ -61,6 +61,7 @@ struct Argument { public: IR::Type GetType() const; bool IsImmediate() const; + bool IsVoid() const; bool FitsInImmediateU32() const; bool FitsInImmediateS32() const; @@ -82,7 +83,7 @@ public: private: friend class RegAlloc; - Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {} + explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {} bool allocated = false; RegAlloc& reg_alloc; @@ -91,7 +92,7 @@ private: class RegAlloc final { public: - using ArgumentInfo = std::array; + using ArgumentInfo = std::array; explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function spill_to_addr) : hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {} diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index ac2ad63a..10a99d9e 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1537,6 +1537,16 @@ U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) { return {}; } +Table IREmitter::VectorTable(std::vector values) { + ASSERT(values.size() >= 1 && values.size() <= 4); + values.resize(4); + return Inst(Opcode::VectorTable, values[0], values[1], values[2], values[3]); +} + +U128 IREmitter::VectorTableLookup(const U128& defaults, const Table& table, const U128& indices) { + return Inst(Opcode::VectorTableLookup, defaults, table, indices); +} + U128 IREmitter::VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b) { switch (esize) { case 8: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index e33ac743..570281db 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -266,6 +266,8 @@ public: U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a); U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a); U128 VectorSub(size_t esize, const U128& a, const U128& b); + Table VectorTable(std::vector values); + U128 VectorTableLookup(const U128& defaults, const Table& table, const U128& indices); U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b); U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a); U128 VectorZeroExtend(size_t original_esize, const U128& a); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 4fbce11d..0d56248f 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -489,7 +489,7 @@ size_t Inst::NumArgs() const { Value Inst::GetArg(size_t index) const { ASSERT_MSG(index < GetNumArgsOf(op), "Inst::GetArg: index {} >= number of arguments of {} ({})", index, op, GetNumArgsOf(op)); - ASSERT_MSG(!args[index].IsEmpty(), "Inst::GetArg: index {} is empty", index); + ASSERT_MSG(!args[index].IsEmpty() || GetArgTypeOf(op, index) == IR::Type::Opaque, "Inst::GetArg: index {} is empty", index, args[index].GetType()); return args[index]; } diff --git a/src/frontend/ir/microinstruction.h b/src/frontend/ir/microinstruction.h index 3be31c2d..ee2d2602 100644 --- a/src/frontend/ir/microinstruction.h +++ b/src/frontend/ir/microinstruction.h @@ -17,6 +17,8 @@ namespace Dynarmic::IR { enum class Opcode; enum class Type; +constexpr size_t max_arg_count = 4; + /** * A representation of a microinstruction. A single ARM/Thumb instruction may be * converted into zero or more microinstructions. @@ -136,7 +138,7 @@ private: Opcode op; size_t use_count = 0; - std::array args; + std::array args; // Pointers to related pseudooperations: // Since not all combinations are possible, we use a union to save space diff --git a/src/frontend/ir/opcodes.cpp b/src/frontend/ir/opcodes.cpp index 8e86d328..af038bac 100644 --- a/src/frontend/ir/opcodes.cpp +++ b/src/frontend/ir/opcodes.cpp @@ -43,6 +43,7 @@ constexpr Type U128 = Type::U128; constexpr Type CoprocInfo = Type::CoprocInfo; constexpr Type NZCV = Type::NZCVFlags; constexpr Type Cond = Type::Cond; +constexpr Type Table = Type::Table; static const std::map opcode_info {{ #define OPCODE(name, type, ...) { Opcode::name, { #name, type, { __VA_ARGS__ } } }, diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 4f4b820b..87f025b1 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -401,6 +401,8 @@ OPCODE(VectorSub8, U128, U128, OPCODE(VectorSub16, U128, U128, U128 ) OPCODE(VectorSub32, U128, U128, U128 ) OPCODE(VectorSub64, U128, U128, U128 ) +OPCODE(VectorTable, Table, U128, Opaque, Opaque, Opaque ) +OPCODE(VectorTableLookup, U128, U128, Table, U128 ) OPCODE(VectorUnsignedAbsoluteDifference8, U128, U128, U128 ) OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128, U128 ) OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 ) diff --git a/src/frontend/ir/type.cpp b/src/frontend/ir/type.cpp index 71eff356..f1ac93e5 100644 --- a/src/frontend/ir/type.cpp +++ b/src/frontend/ir/type.cpp @@ -16,13 +16,23 @@ namespace Dynarmic::IR { std::string GetNameOf(Type type) { - static const std::array names = { - "Void", "A32Reg", "A32ExtReg", "A64Reg", "A64Vec", "Opaque", "U1", "U8", "U16", "U32", "U64", "F32", "F64", "CoprocInfo", "NZCVFlags", "Cond" - }; - const size_t index = static_cast(type); - if (index > names.size()) - return fmt::format("Unknown Type {}", index); - return names.at(index); + static const std::array names{"A32Reg", "A32ExtReg", "A64Reg", "A64Vec", "Opaque", "U1", "U8", "U16", "U32", "U64", "U128", "CoprocInfo", "NZCVFlags", "Cond", "Table"}; + + const size_t bits = static_cast(type); + if (bits == 0) { + return "Void"; + } + + std::string result; + for (size_t i = 0; i < names.size(); i++) { + if ((bits & (size_t(1) << i)) != 0) { + if (!result.empty()) { + result += '|'; + } + result += names[i]; + } + } + return result; } bool AreTypesCompatible(Type t1, Type t2) { diff --git a/src/frontend/ir/type.h b/src/frontend/ir/type.h index 593a9918..db60459e 100644 --- a/src/frontend/ir/type.h +++ b/src/frontend/ir/type.h @@ -32,6 +32,7 @@ enum class Type { CoprocInfo = 1 << 11, NZCVFlags = 1 << 12, Cond = 1 << 13, + Table = 1 << 14, }; constexpr Type operator|(Type a, Type b) { diff --git a/src/frontend/ir/value.h b/src/frontend/ir/value.h index 552a1bed..2c946291 100644 --- a/src/frontend/ir/value.h +++ b/src/frontend/ir/value.h @@ -103,5 +103,6 @@ using U32U64 = TypedValue; using UAny = TypedValue; using UAnyU128 = TypedValue; using NZCV = TypedValue; +using Table = TypedValue; } // namespace Dynarmic::IR