backend/x64: Implement SHA256 polyfills
This commit is contained in:
parent
98cff8dd0d
commit
bb713194a0
5 changed files with 202 additions and 1 deletions
|
@ -92,6 +92,7 @@ add_library(dynarmic
|
||||||
ir/opt/identity_removal_pass.cpp
|
ir/opt/identity_removal_pass.cpp
|
||||||
ir/opt/ir_matcher.h
|
ir/opt/ir_matcher.h
|
||||||
ir/opt/passes.h
|
ir/opt/passes.h
|
||||||
|
ir/opt/polyfill_pass.cpp
|
||||||
ir/opt/verification_pass.cpp
|
ir/opt/verification_pass.cpp
|
||||||
ir/terminal.h
|
ir/terminal.h
|
||||||
ir/type.cpp
|
ir/type.cpp
|
||||||
|
|
|
@ -50,16 +50,24 @@ static std::function<void(BlockOfCode&)> GenRCP(const A32::UserConfig& conf) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) {
|
||||||
|
return Optimization::PolyfillOptions{
|
||||||
|
.sha256 = !code.HasHostFeature(HostFeature::SHA),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
struct Jit::Impl {
|
struct Jit::Impl {
|
||||||
Impl(Jit* jit, A32::UserConfig conf)
|
Impl(Jit* jit, A32::UserConfig conf)
|
||||||
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
|
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
|
||||||
, emitter(block_of_code, conf, jit)
|
, emitter(block_of_code, conf, jit)
|
||||||
|
, polyfill_options(GenPolyfillOptions(block_of_code))
|
||||||
, conf(std::move(conf))
|
, conf(std::move(conf))
|
||||||
, jit_interface(jit) {}
|
, jit_interface(jit) {}
|
||||||
|
|
||||||
A32JitState jit_state;
|
A32JitState jit_state;
|
||||||
BlockOfCode block_of_code;
|
BlockOfCode block_of_code;
|
||||||
A32EmitX64 emitter;
|
A32EmitX64 emitter;
|
||||||
|
Optimization::PolyfillOptions polyfill_options;
|
||||||
|
|
||||||
const A32::UserConfig conf;
|
const A32::UserConfig conf;
|
||||||
|
|
||||||
|
@ -154,6 +162,7 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
|
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
|
||||||
|
Optimization::PolyfillPass(ir_block, polyfill_options);
|
||||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
||||||
Optimization::A32GetSetElimination(ir_block);
|
Optimization::A32GetSetElimination(ir_block);
|
||||||
Optimization::DeadCodeElimination(ir_block);
|
Optimization::DeadCodeElimination(ir_block);
|
||||||
|
|
|
@ -45,12 +45,19 @@ static std::function<void(BlockOfCode&)> GenRCP(const A64::UserConfig& conf) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code) {
|
||||||
|
return Optimization::PolyfillOptions{
|
||||||
|
.sha256 = !code.HasHostFeature(HostFeature::SHA),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
struct Jit::Impl final {
|
struct Jit::Impl final {
|
||||||
public:
|
public:
|
||||||
Impl(Jit* jit, UserConfig conf)
|
Impl(Jit* jit, UserConfig conf)
|
||||||
: conf(conf)
|
: conf(conf)
|
||||||
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
|
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
|
||||||
, emitter(block_of_code, conf, jit) {
|
, emitter(block_of_code, conf, jit)
|
||||||
|
, polyfill_options(GenPolyfillOptions(block_of_code)) {
|
||||||
ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);
|
ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -253,6 +260,7 @@ private:
|
||||||
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
|
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
|
||||||
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
|
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
|
||||||
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
|
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
|
||||||
|
Optimization::PolyfillPass(ir_block, polyfill_options);
|
||||||
Optimization::A64CallbackConfigPass(ir_block, conf);
|
Optimization::A64CallbackConfigPass(ir_block, conf);
|
||||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
||||||
Optimization::A64GetSetElimination(ir_block);
|
Optimization::A64GetSetElimination(ir_block);
|
||||||
|
@ -301,6 +309,7 @@ private:
|
||||||
A64JitState jit_state;
|
A64JitState jit_state;
|
||||||
BlockOfCode block_of_code;
|
BlockOfCode block_of_code;
|
||||||
A64EmitX64 emitter;
|
A64EmitX64 emitter;
|
||||||
|
Optimization::PolyfillOptions polyfill_options;
|
||||||
|
|
||||||
bool invalidate_entire_cache = false;
|
bool invalidate_entire_cache = false;
|
||||||
boost::icl::interval_set<u64> invalid_cache_ranges;
|
boost::icl::interval_set<u64> invalid_cache_ranges;
|
||||||
|
|
|
@ -20,6 +20,13 @@ class Block;
|
||||||
|
|
||||||
namespace Dynarmic::Optimization {
|
namespace Dynarmic::Optimization {
|
||||||
|
|
||||||
|
struct PolyfillOptions {
|
||||||
|
bool sha256 = false;
|
||||||
|
|
||||||
|
bool operator==(const PolyfillOptions&) const = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
void PolyfillPass(IR::Block& block, const PolyfillOptions& opt);
|
||||||
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb);
|
void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb);
|
||||||
void A32GetSetElimination(IR::Block& block);
|
void A32GetSetElimination(IR::Block& block);
|
||||||
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf);
|
void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf);
|
||||||
|
|
175
src/dynarmic/ir/opt/polyfill_pass.cpp
Normal file
175
src/dynarmic/ir/opt/polyfill_pass.cpp
Normal file
|
@ -0,0 +1,175 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2022 MerryMage
|
||||||
|
* SPDX-License-Identifier: 0BSD
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/ir/basic_block.h"
|
||||||
|
#include "dynarmic/ir/ir_emitter.h"
|
||||||
|
#include "dynarmic/ir/microinstruction.h"
|
||||||
|
#include "dynarmic/ir/opcodes.h"
|
||||||
|
#include "dynarmic/ir/opt/passes.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::Optimization {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
|
const IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||||
|
const IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||||
|
|
||||||
|
const IR::U128 t = ir.VectorExtract(x, y, 32);
|
||||||
|
|
||||||
|
IR::U128 result = ir.ZeroVector();
|
||||||
|
for (size_t i = 0; i < 4; i++) {
|
||||||
|
const IR::U32 modified_element = [&] {
|
||||||
|
const IR::U32 element = ir.VectorGetElement(32, t, i);
|
||||||
|
const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7));
|
||||||
|
const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18));
|
||||||
|
const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3));
|
||||||
|
|
||||||
|
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
||||||
|
}();
|
||||||
|
|
||||||
|
result = ir.VectorSetElement(32, result, i, modified_element);
|
||||||
|
}
|
||||||
|
result = ir.VectorAdd(32, result, x);
|
||||||
|
|
||||||
|
inst.ReplaceUsesWith(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
|
const IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||||
|
const IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||||
|
const IR::U128 z = (IR::U128)inst.GetArg(2);
|
||||||
|
|
||||||
|
const IR::U128 T0 = ir.VectorExtract(y, z, 32);
|
||||||
|
|
||||||
|
const IR::U128 lower_half = [&] {
|
||||||
|
const IR::U128 T = ir.VectorShuffleWords(z, 0b01001110);
|
||||||
|
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
|
||||||
|
const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19);
|
||||||
|
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10);
|
||||||
|
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
|
||||||
|
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0));
|
||||||
|
return ir.VectorZeroUpper(tmp5);
|
||||||
|
}();
|
||||||
|
|
||||||
|
const IR::U64 upper_half = [&] {
|
||||||
|
const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17);
|
||||||
|
const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19);
|
||||||
|
const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10);
|
||||||
|
const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3));
|
||||||
|
|
||||||
|
// Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2]
|
||||||
|
const IR::U128 shuffled_d = ir.VectorShuffleWords(x, 0b01001110);
|
||||||
|
const IR::U128 shuffled_T0 = ir.VectorShuffleWords(T0, 0b01001110);
|
||||||
|
|
||||||
|
const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0));
|
||||||
|
return ir.VectorGetElement(64, tmp5, 0);
|
||||||
|
}();
|
||||||
|
|
||||||
|
const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half);
|
||||||
|
|
||||||
|
inst.ReplaceUsesWith(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
||||||
|
return ir.Eor(ir.And(ir.Eor(y, z), x), z);
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
||||||
|
return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z));
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
|
||||||
|
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2));
|
||||||
|
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13));
|
||||||
|
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22));
|
||||||
|
|
||||||
|
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
|
||||||
|
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6));
|
||||||
|
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11));
|
||||||
|
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25));
|
||||||
|
|
||||||
|
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
||||||
|
}
|
||||||
|
|
||||||
|
void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
|
IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||||
|
IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||||
|
const IR::U128 w = (IR::U128)inst.GetArg(2);
|
||||||
|
const bool part1 = inst.GetArg(3).GetU1();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 4; i++) {
|
||||||
|
const IR::U32 low_x = ir.VectorGetElement(32, x, 0);
|
||||||
|
const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1);
|
||||||
|
const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2);
|
||||||
|
const IR::U32 high_x = ir.VectorGetElement(32, x, 3);
|
||||||
|
|
||||||
|
const IR::U32 low_y = ir.VectorGetElement(32, y, 0);
|
||||||
|
const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1);
|
||||||
|
const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2);
|
||||||
|
const IR::U32 high_y = ir.VectorGetElement(32, y, 3);
|
||||||
|
|
||||||
|
const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y);
|
||||||
|
const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x);
|
||||||
|
|
||||||
|
const IR::U32 t = [&] {
|
||||||
|
const IR::U32 w_element = ir.VectorGetElement(32, w, i);
|
||||||
|
const IR::U32 sig = SHAhashSIGMA1(ir, low_y);
|
||||||
|
|
||||||
|
return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element)));
|
||||||
|
}();
|
||||||
|
|
||||||
|
const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority));
|
||||||
|
const IR::U32 new_low_y = ir.Add(t, high_x);
|
||||||
|
|
||||||
|
// Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3]
|
||||||
|
const IR::U128 shuffled_x = ir.VectorShuffleWords(x, 0b10010011);
|
||||||
|
const IR::U128 shuffled_y = ir.VectorShuffleWords(y, 0b10010011);
|
||||||
|
|
||||||
|
x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x);
|
||||||
|
y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y);
|
||||||
|
}
|
||||||
|
|
||||||
|
inst.ReplaceUsesWith(part1 ? x : y);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
|
||||||
|
if (polyfill == PolyfillOptions{}) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
IR::IREmitter ir{block};
|
||||||
|
|
||||||
|
for (auto& inst : block) {
|
||||||
|
ir.SetInsertionPoint(&inst);
|
||||||
|
|
||||||
|
switch (inst.GetOpcode()) {
|
||||||
|
case IR::Opcode::SHA256MessageSchedule0:
|
||||||
|
if (polyfill.sha256) {
|
||||||
|
PolyfillSHA256MessageSchedule0(ir, inst);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SHA256MessageSchedule1:
|
||||||
|
if (polyfill.sha256) {
|
||||||
|
PolyfillSHA256MessageSchedule1(ir, inst);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SHA256Hash:
|
||||||
|
if (polyfill.sha256) {
|
||||||
|
PolyfillSHA256Hash(ir, inst);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::Optimization
|
Loading…
Reference in a new issue