From bb713194a0d04bd6c49e444b611adbb7e31f312b Mon Sep 17 00:00:00 2001 From: merry Date: Sun, 20 Mar 2022 12:17:10 +0000 Subject: [PATCH] backend/x64: Implement SHA256 polyfills --- src/dynarmic/CMakeLists.txt | 1 + src/dynarmic/backend/x64/a32_interface.cpp | 9 ++ src/dynarmic/backend/x64/a64_interface.cpp | 11 +- src/dynarmic/ir/opt/passes.h | 7 + src/dynarmic/ir/opt/polyfill_pass.cpp | 175 +++++++++++++++++++++ 5 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 src/dynarmic/ir/opt/polyfill_pass.cpp diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 53a16f83..c6c9060f 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -92,6 +92,7 @@ add_library(dynarmic ir/opt/identity_removal_pass.cpp ir/opt/ir_matcher.h ir/opt/passes.h + ir/opt/polyfill_pass.cpp ir/opt/verification_pass.cpp ir/terminal.h ir/type.cpp diff --git a/src/dynarmic/backend/x64/a32_interface.cpp b/src/dynarmic/backend/x64/a32_interface.cpp index 49c7e8d0..3d7997b4 100644 --- a/src/dynarmic/backend/x64/a32_interface.cpp +++ b/src/dynarmic/backend/x64/a32_interface.cpp @@ -50,16 +50,24 @@ static std::function GenRCP(const A32::UserConfig& conf) { }; } +static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) { + return Optimization::PolyfillOptions{ + .sha256 = !code.HasHostFeature(HostFeature::SHA), + }; +} + struct Jit::Impl { Impl(Jit* jit, A32::UserConfig conf) : block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) , emitter(block_of_code, conf, jit) + , polyfill_options(GenPolyfillOptions(block_of_code)) , conf(std::move(conf)) , jit_interface(jit) {} A32JitState jit_state; BlockOfCode block_of_code; A32EmitX64 emitter; + Optimization::PolyfillOptions polyfill_options; const A32::UserConfig conf; @@ -154,6 +162,7 @@ private: } IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); + Optimization::PolyfillPass(ir_block, polyfill_options); if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { Optimization::A32GetSetElimination(ir_block); Optimization::DeadCodeElimination(ir_block); diff --git a/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/backend/x64/a64_interface.cpp index c19c4939..abcc2727 100644 --- a/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/backend/x64/a64_interface.cpp @@ -45,12 +45,19 @@ static std::function GenRCP(const A64::UserConfig& conf) { }; } +static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) { + return Optimization::PolyfillOptions{ + .sha256 = !code.HasHostFeature(HostFeature::SHA), + }; +} + struct Jit::Impl final { public: Impl(Jit* jit, UserConfig conf) : conf(conf) , block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) - , emitter(block_of_code, conf, jit) { + , emitter(block_of_code, conf, jit) + , polyfill_options(GenPolyfillOptions(block_of_code)) { ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64); } @@ -253,6 +260,7 @@ private: const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); + Optimization::PolyfillPass(ir_block, polyfill_options); Optimization::A64CallbackConfigPass(ir_block, conf); if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { Optimization::A64GetSetElimination(ir_block); @@ -301,6 +309,7 @@ private: A64JitState jit_state; BlockOfCode block_of_code; A64EmitX64 emitter; + Optimization::PolyfillOptions polyfill_options; bool invalidate_entire_cache = false; boost::icl::interval_set invalid_cache_ranges; diff --git a/src/dynarmic/ir/opt/passes.h b/src/dynarmic/ir/opt/passes.h index fb19017c..ad5d09df 100644 --- a/src/dynarmic/ir/opt/passes.h +++ b/src/dynarmic/ir/opt/passes.h @@ -20,6 +20,13 @@ class Block; namespace Dynarmic::Optimization { +struct PolyfillOptions { + bool sha256 = false; + + bool operator==(const PolyfillOptions&) const = default; +}; + +void PolyfillPass(IR::Block& block, const PolyfillOptions& opt); void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb); void A32GetSetElimination(IR::Block& block); void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf); diff --git a/src/dynarmic/ir/opt/polyfill_pass.cpp b/src/dynarmic/ir/opt/polyfill_pass.cpp new file mode 100644 index 00000000..77800ddc --- /dev/null +++ b/src/dynarmic/ir/opt/polyfill_pass.cpp @@ -0,0 +1,175 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/ir_emitter.h" +#include "dynarmic/ir/microinstruction.h" +#include "dynarmic/ir/opcodes.h" +#include "dynarmic/ir/opt/passes.h" + +namespace Dynarmic::Optimization { + +namespace { + +void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + + const IR::U128 t = ir.VectorExtract(x, y, 32); + + IR::U128 result = ir.ZeroVector(); + for (size_t i = 0; i < 4; i++) { + const IR::U32 modified_element = [&] { + const IR::U32 element = ir.VectorGetElement(32, t, i); + const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); + const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); + const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); + }(); + + result = ir.VectorSetElement(32, result, i, modified_element); + } + result = ir.VectorAdd(32, result, x); + + inst.ReplaceUsesWith(result); +} + +void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 z = (IR::U128)inst.GetArg(2); + + const IR::U128 T0 = ir.VectorExtract(y, z, 32); + + const IR::U128 lower_half = [&] { + const IR::U128 T = ir.VectorShuffleWords(z, 0b01001110); + const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); + return ir.VectorZeroUpper(tmp5); + }(); + + const IR::U64 upper_half = [&] { + const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + + // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] + const IR::U128 shuffled_d = ir.VectorShuffleWords(x, 0b01001110); + const IR::U128 shuffled_T0 = ir.VectorShuffleWords(T0, 0b01001110); + + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); + return ir.VectorGetElement(64, tmp5, 0); + }(); + + const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); + + inst.ReplaceUsesWith(result); +} + +IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Eor(ir.And(ir.Eor(y, z), x), z); +} + +IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); +} + +IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 x = (IR::U128)inst.GetArg(0); + IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 w = (IR::U128)inst.GetArg(2); + const bool part1 = inst.GetArg(3).GetU1(); + + for (size_t i = 0; i < 4; i++) { + const IR::U32 low_x = ir.VectorGetElement(32, x, 0); + const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); + const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); + const IR::U32 high_x = ir.VectorGetElement(32, x, 3); + + const IR::U32 low_y = ir.VectorGetElement(32, y, 0); + const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); + const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); + const IR::U32 high_y = ir.VectorGetElement(32, y, 3); + + const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); + const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); + + const IR::U32 t = [&] { + const IR::U32 w_element = ir.VectorGetElement(32, w, i); + const IR::U32 sig = SHAhashSIGMA1(ir, low_y); + + return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); + }(); + + const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); + const IR::U32 new_low_y = ir.Add(t, high_x); + + // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] + const IR::U128 shuffled_x = ir.VectorShuffleWords(x, 0b10010011); + const IR::U128 shuffled_y = ir.VectorShuffleWords(y, 0b10010011); + + x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); + y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); + } + + inst.ReplaceUsesWith(part1 ? x : y); +} + +} // namespace + +void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { + if (polyfill == PolyfillOptions{}) { + return; + } + + IR::IREmitter ir{block}; + + for (auto& inst : block) { + ir.SetInsertionPoint(&inst); + + switch (inst.GetOpcode()) { + case IR::Opcode::SHA256MessageSchedule0: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule0(ir, inst); + } + break; + case IR::Opcode::SHA256MessageSchedule1: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule1(ir, inst); + } + break; + case IR::Opcode::SHA256Hash: + if (polyfill.sha256) { + PolyfillSHA256Hash(ir, inst); + } + break; + default: + break; + } + } +} + +} // namespace Dynarmic::Optimization