backend/x64: Implement SHA256 polyfills

This commit is contained in:
merry 2022-03-20 12:17:10 +00:00
parent 98cff8dd0d
commit bb713194a0
5 changed files with 202 additions and 1 deletions

View file

@ -92,6 +92,7 @@ add_library(dynarmic
ir/opt/identity_removal_pass.cpp ir/opt/identity_removal_pass.cpp
ir/opt/ir_matcher.h ir/opt/ir_matcher.h
ir/opt/passes.h ir/opt/passes.h
ir/opt/polyfill_pass.cpp
ir/opt/verification_pass.cpp ir/opt/verification_pass.cpp
ir/terminal.h ir/terminal.h
ir/type.cpp ir/type.cpp

View file

@ -50,16 +50,24 @@ static std::function<void(BlockOfCode&)> GenRCP(const A32::UserConfig& conf) {
}; };
} }
static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) {
return Optimization::PolyfillOptions{
.sha256 = !code.HasHostFeature(HostFeature::SHA),
};
}
struct Jit::Impl { struct Jit::Impl {
Impl(Jit* jit, A32::UserConfig conf) Impl(Jit* jit, A32::UserConfig conf)
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) : block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, emitter(block_of_code, conf, jit) , emitter(block_of_code, conf, jit)
, polyfill_options(GenPolyfillOptions(block_of_code))
, conf(std::move(conf)) , conf(std::move(conf))
, jit_interface(jit) {} , jit_interface(jit) {}
A32JitState jit_state; A32JitState jit_state;
BlockOfCode block_of_code; BlockOfCode block_of_code;
A32EmitX64 emitter; A32EmitX64 emitter;
Optimization::PolyfillOptions polyfill_options;
const A32::UserConfig conf; const A32::UserConfig conf;
@ -154,6 +162,7 @@ private:
} }
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
Optimization::PolyfillPass(ir_block, polyfill_options);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
Optimization::A32GetSetElimination(ir_block); Optimization::A32GetSetElimination(ir_block);
Optimization::DeadCodeElimination(ir_block); Optimization::DeadCodeElimination(ir_block);

View file

@ -45,12 +45,19 @@ static std::function<void(BlockOfCode&)> GenRCP(const A64::UserConfig& conf) {
}; };
} }
static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) {
return Optimization::PolyfillOptions{
.sha256 = !code.HasHostFeature(HostFeature::SHA),
};
}
struct Jit::Impl final { struct Jit::Impl final {
public: public:
Impl(Jit* jit, UserConfig conf) Impl(Jit* jit, UserConfig conf)
: conf(conf) : conf(conf)
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf)) , block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, emitter(block_of_code, conf, jit) { , emitter(block_of_code, conf, jit)
, polyfill_options(GenPolyfillOptions(block_of_code)) {
ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64); ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);
} }
@ -253,6 +260,7 @@ private:
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
Optimization::PolyfillPass(ir_block, polyfill_options);
Optimization::A64CallbackConfigPass(ir_block, conf); Optimization::A64CallbackConfigPass(ir_block, conf);
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
Optimization::A64GetSetElimination(ir_block); Optimization::A64GetSetElimination(ir_block);
@ -301,6 +309,7 @@ private:
A64JitState jit_state; A64JitState jit_state;
BlockOfCode block_of_code; BlockOfCode block_of_code;
A64EmitX64 emitter; A64EmitX64 emitter;
Optimization::PolyfillOptions polyfill_options;
bool invalidate_entire_cache = false; bool invalidate_entire_cache = false;
boost::icl::interval_set<u64> invalid_cache_ranges; boost::icl::interval_set<u64> invalid_cache_ranges;

View file

@ -20,6 +20,13 @@ class Block;
namespace Dynarmic::Optimization { namespace Dynarmic::Optimization {
struct PolyfillOptions {
bool sha256 = false;
bool operator==(const PolyfillOptions&) const = default;
};
void PolyfillPass(IR::Block& block, const PolyfillOptions& opt);
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb); void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb);
void A32GetSetElimination(IR::Block& block); void A32GetSetElimination(IR::Block& block);
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf); void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf);

View file

@ -0,0 +1,175 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/ir/basic_block.h"
#include "dynarmic/ir/ir_emitter.h"
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/opcodes.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Optimization {
namespace {
void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
const IR::U128 x = (IR::U128)inst.GetArg(0);
const IR::U128 y = (IR::U128)inst.GetArg(1);
const IR::U128 t = ir.VectorExtract(x, y, 32);
IR::U128 result = ir.ZeroVector();
for (size_t i = 0; i < 4; i++) {
const IR::U32 modified_element = [&] {
const IR::U32 element = ir.VectorGetElement(32, t, i);
const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7));
const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18));
const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3));
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
}();
result = ir.VectorSetElement(32, result, i, modified_element);
}
result = ir.VectorAdd(32, result, x);
inst.ReplaceUsesWith(result);
}
void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
const IR::U128 x = (IR::U128)inst.GetArg(0);
const IR::U128 y = (IR::U128)inst.GetArg(1);
const IR::U128 z = (IR::U128)inst.GetArg(2);
const IR::U128 T0 = ir.VectorExtract(y, z, 32);
const IR::U128 lower_half = [&] {
const IR::U128 T = ir.VectorShuffleWords(z, 0b01001110);
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19);
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10);
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0));
return ir.VectorZeroUpper(tmp5);
}();
const IR::U64 upper_half = [&] {
const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17);
const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19);
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10);
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
// Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2]
const IR::U128 shuffled_d = ir.VectorShuffleWords(x, 0b01001110);
const IR::U128 shuffled_T0 = ir.VectorShuffleWords(T0, 0b01001110);
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0));
return ir.VectorGetElement(64, tmp5, 0);
}();
const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half);
inst.ReplaceUsesWith(result);
}
IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
return ir.Eor(ir.And(ir.Eor(y, z), x), z);
}
IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z));
}
IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2));
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13));
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22));
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
}
IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6));
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11));
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25));
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
}
void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
IR::U128 x = (IR::U128)inst.GetArg(0);
IR::U128 y = (IR::U128)inst.GetArg(1);
const IR::U128 w = (IR::U128)inst.GetArg(2);
const bool part1 = inst.GetArg(3).GetU1();
for (size_t i = 0; i < 4; i++) {
const IR::U32 low_x = ir.VectorGetElement(32, x, 0);
const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1);
const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2);
const IR::U32 high_x = ir.VectorGetElement(32, x, 3);
const IR::U32 low_y = ir.VectorGetElement(32, y, 0);
const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1);
const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2);
const IR::U32 high_y = ir.VectorGetElement(32, y, 3);
const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y);
const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x);
const IR::U32 t = [&] {
const IR::U32 w_element = ir.VectorGetElement(32, w, i);
const IR::U32 sig = SHAhashSIGMA1(ir, low_y);
return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element)));
}();
const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority));
const IR::U32 new_low_y = ir.Add(t, high_x);
// Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3]
const IR::U128 shuffled_x = ir.VectorShuffleWords(x, 0b10010011);
const IR::U128 shuffled_y = ir.VectorShuffleWords(y, 0b10010011);
x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x);
y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y);
}
inst.ReplaceUsesWith(part1 ? x : y);
}
} // namespace
void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
if (polyfill == PolyfillOptions{}) {
return;
}
IR::IREmitter ir{block};
for (auto& inst : block) {
ir.SetInsertionPoint(&inst);
switch (inst.GetOpcode()) {
case IR::Opcode::SHA256MessageSchedule0:
if (polyfill.sha256) {
PolyfillSHA256MessageSchedule0(ir, inst);
}
break;
case IR::Opcode::SHA256MessageSchedule1:
if (polyfill.sha256) {
PolyfillSHA256MessageSchedule1(ir, inst);
}
break;
case IR::Opcode::SHA256Hash:
if (polyfill.sha256) {
PolyfillSHA256Hash(ir, inst);
}
break;
default:
break;
}
}
}
} // namespace Dynarmic::Optimization