diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 82248e3c..48c1c673 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -63,6 +63,7 @@ add_library(dynarmic frontend/A64/translate/impl/data_processing_addsub.cpp frontend/A64/translate/impl/data_processing_bitfield.cpp frontend/A64/translate/impl/data_processing_conditional_select.cpp + frontend/A64/translate/impl/data_processing_crc32.cpp frontend/A64/translate/impl/data_processing_logical.cpp frontend/A64/translate/impl/data_processing_multiply.cpp frontend/A64/translate/impl/data_processing_pcrel.cpp @@ -133,6 +134,7 @@ if (ARCHITECTURE_x86_64) backend_x64/constant_pool.h backend_x64/emit_x64.cpp backend_x64/emit_x64.h + backend_x64/emit_x64_crc32.cpp backend_x64/emit_x64_data_processing.cpp backend_x64/emit_x64_floating_point.cpp backend_x64/emit_x64_packed.cpp diff --git a/src/backend_x64/emit_x64_crc32.cpp b/src/backend_x64/emit_x64_crc32.cpp new file mode 100644 index 00000000..3c44b56f --- /dev/null +++ b/src/backend_x64/emit_x64_crc32.cpp @@ -0,0 +1,130 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public icense version 2 or any later version. + */ + +#include +#include + +#include "backend_x64/block_of_code.h" +#include "backend_x64/emit_x64.h" +#include "common/common_types.h" +#include "frontend/ir/microinstruction.h" +#include "frontend/ir/opcodes.h" + +namespace Dynarmic::BackendX64 { + +using namespace Xbyak::util; + +// CRC32 algorithm that uses polynomial 0x1EDC6F41 +constexpr std::array castagnoli_table{{ + 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, + 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, + 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, + 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, + 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, + 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, + 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, + 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, + 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, + 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, + 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, + 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, + 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, + 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A, + 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, + 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, + 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, + 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957, + 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, + 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, + 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, + 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, + 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, + 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, + 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, + 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, + 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, + 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, + 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, + 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, + 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, + 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, + 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, + 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, + 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, + 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, + 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, + 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, + 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, + 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, + 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, + 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, + 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, + 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, + 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, + 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, + 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, + 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, + 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, + 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, + 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, + 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, + 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, + 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540, + 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, + 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, + 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, + 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, + 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, + 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, + 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, + 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, + 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, + 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351 +}}; + +static u32 ComputeCRC32(const std::array& table, u32 crc, const u64 value, int length) { + const auto* data = reinterpret_cast(&value); + + while (length-- != 0) { + crc = (crc >> 8) ^ table[(crc ^ (*data++)) & 0xFF]; + } + + return crc; +} + +static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE42)) { + const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size); + code.crc32(crc, value); + ctx.reg_alloc.DefineValue(inst, crc); + } else { + ctx.reg_alloc.HostCall(inst, {}, args[0], args[1], {}); + code.mov(code.ABI_PARAM1, reinterpret_cast(&castagnoli_table)); + code.mov(code.ABI_PARAM4, data_size / CHAR_BIT); + code.CallFunction(&ComputeCRC32); + } +} + +void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) { + EmitCRC32Castagnoli(*code, ctx, inst, 8); +} + +void EmitX64::EmitCRC32Castagnoli16(EmitContext& ctx, IR::Inst* inst) { + EmitCRC32Castagnoli(*code, ctx, inst, 16); +} + +void EmitX64::EmitCRC32Castagnoli32(EmitContext& ctx, IR::Inst* inst) { + EmitCRC32Castagnoli(*code, ctx, inst, 32); +} + +void EmitX64::EmitCRC32Castagnoli64(EmitContext& ctx, IR::Inst* inst) { + EmitCRC32Castagnoli(*code, ctx, inst, 64); +} + +} // namespace Dynarmic::BackendX64 diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 8ab56709..2820413d 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -275,7 +275,7 @@ INST(LSRV, "LSRV", "z0011 INST(ASRV, "ASRV", "z0011010110mmmmm001010nnnnnddddd") INST(RORV, "RORV", "z0011010110mmmmm001011nnnnnddddd") //INST(CRC32, "CRC32B, CRC32H, CRC32W, CRC32X", "z0011010110mmmmm0100zznnnnnddddd") -//INST(CRC32C, "CRC32CB, CRC32CH, CRC32CW, CRC32CX", "z0011010110mmmmm0101zznnnnnddddd") +INST(CRC32C, "CRC32CB, CRC32CH, CRC32CW, CRC32CX", "z0011010110mmmmm0101zznnnnnddddd") //INST(PACGA, "PACGA", "10011010110mmmmm001100nnnnnddddd") // Data Processing - Register - 1 source diff --git a/src/frontend/A64/translate/impl/data_processing_crc32.cpp b/src/frontend/A64/translate/impl/data_processing_crc32.cpp new file mode 100644 index 00000000..66430a8b --- /dev/null +++ b/src/frontend/A64/translate/impl/data_processing_crc32.cpp @@ -0,0 +1,44 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "frontend/A64/translate/impl/impl.h" + +namespace Dynarmic::A64 { + +bool TranslatorVisitor::CRC32C(bool sf, Reg Rm, Imm<2> sz, Reg Rn, Reg Rd) { + const u32 integral_size = sz.ZeroExtend(); + + if (sf && integral_size != 0b11) { + return UnallocatedEncoding(); + } + + if (!sf && integral_size == 0b11) { + return UnallocatedEncoding(); + } + + const IR::U32 result = [&] { + const size_t datasize = sf ? 64 : 32; + const IR::U32 accumulator = ir.GetW(Rn); + const IR::U32U64 data = X(datasize, Rm); + + switch (integral_size) { + case 0b00: + return ir.CRC32Castagnoli8(accumulator, data); + case 0b01: + return ir.CRC32Castagnoli16(accumulator, data); + case 0b10: + return ir.CRC32Castagnoli32(accumulator, data); + case 0b11: + default: + return ir.CRC32Castagnoli64(accumulator, data); + } + }(); + + X(32, Rd, result); + return true; +} + +} // namespace Dynarmic::A64 diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index d138c17c..efd59076 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -683,6 +683,22 @@ U32 IREmitter::PackedSelect(const U32& ge, const U32& a, const U32& b) { return Inst(Opcode::PackedSelect, ge, a, b); } +U32 IREmitter::CRC32Castagnoli8(const U32& a, const U32& b) { + return Inst(Opcode::CRC32Castagnoli8, a, b); +} + +U32 IREmitter::CRC32Castagnoli16(const U32& a, const U32& b) { + return Inst(Opcode::CRC32Castagnoli16, a, b); +} + +U32 IREmitter::CRC32Castagnoli32(const U32& a, const U32& b) { + return Inst(Opcode::CRC32Castagnoli32, a, b); +} + +U32 IREmitter::CRC32Castagnoli64(const U32& a, const U64& b) { + return Inst(Opcode::CRC32Castagnoli64, a, b); +} + UAny IREmitter::VectorGetElement(size_t esize, const U128& a, size_t index) { ASSERT_MSG(esize * index < 128, "Invalid index"); switch (esize) { diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 3c40d8fa..b97f1d18 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -186,6 +186,11 @@ public: U32 PackedAbsDiffSumS8(const U32& a, const U32& b); U32 PackedSelect(const U32& ge, const U32& a, const U32& b); + U32 CRC32Castagnoli8(const U32& a, const U32& b); + U32 CRC32Castagnoli16(const U32& a, const U32& b); + U32 CRC32Castagnoli32(const U32& a, const U32& b); + U32 CRC32Castagnoli64(const U32& a, const U64& b); + UAny VectorGetElement(size_t esize, const U128& a, size_t index); U128 VectorAdd8(const U128& a, const U128& b); U128 VectorAdd16(const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 3e8af4a6..f408a44b 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -167,6 +167,12 @@ OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32 ) OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 ) +// CRC instructions +OPCODE(CRC32Castagnoli8, T::U32, T::U32, T::U32 ) +OPCODE(CRC32Castagnoli16, T::U32, T::U32, T::U32 ) +OPCODE(CRC32Castagnoli32, T::U32, T::U32, T::U32 ) +OPCODE(CRC32Castagnoli64, T::U32, T::U32, T::U64 ) + // Vector instructions OPCODE(VectorGetElement8, T::U8, T::U128, T::U8 ) OPCODE(VectorGetElement16, T::U16, T::U128, T::U8 )