Implement SADD8, SADD16, SSUB8, SSUB16, USUB16
This commit is contained in:
parent
3f6ecfe245
commit
8919265d2c
6 changed files with 398 additions and 42 deletions
|
@ -1278,6 +1278,26 @@ void EmitX64::EmitCountLeadingZeros(IR::Block&, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitNegateLowWord(IR::Block&, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
|
||||||
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
|
||||||
|
code->ror(result, 16);
|
||||||
|
code->xor(result, 0xFFFF0000);
|
||||||
|
code->add(result, 0x00010000);
|
||||||
|
code->ror(result, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitNegateHighWord(IR::Block&, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
|
||||||
|
Xbyak::Reg32 result = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
|
||||||
|
code->xor(result, 0xFFFF0000);
|
||||||
|
code->add(result, 0x00010000);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitSignedSaturatedAdd(IR::Block& block, IR::Inst* inst) {
|
void EmitX64::EmitSignedSaturatedAdd(IR::Block& block, IR::Inst* inst) {
|
||||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||||
|
|
||||||
|
@ -1328,6 +1348,25 @@ void EmitX64::EmitSignedSaturatedSub(IR::Block& block, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ExtractMostSignificantBitFromPackedBytes(const Xbyak::util::Cpu& cpu_info, BlockOfCode* code, RegAlloc& reg_alloc, Xbyak::Reg32 value, boost::optional<Xbyak::Reg32> a_tmp = boost::none) {
|
||||||
|
if (cpu_info.has(Xbyak::util::Cpu::tBMI2)) {
|
||||||
|
Xbyak::Reg32 tmp = a_tmp ? *a_tmp : reg_alloc.ScratchGpr().cvt32();
|
||||||
|
code->mov(tmp, 0x80808080);
|
||||||
|
code->pext(value, value, tmp);
|
||||||
|
} else {
|
||||||
|
code->and_(value, 0x80808080);
|
||||||
|
code->imul(value, value, 0x00204081);
|
||||||
|
code->shr(value, 28);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ExtractAndDuplicateMostSignificantBitFromPackedWords(BlockOfCode* code, Xbyak::Reg32 value) {
|
||||||
|
code->and_(value, 0x80008000);
|
||||||
|
code->shr(value, 1);
|
||||||
|
code->imul(value, value, 0xC003);
|
||||||
|
code->shr(value, 28);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitPackedAddU8(IR::Block& block, IR::Inst* inst) {
|
void EmitX64::EmitPackedAddU8(IR::Block& block, IR::Inst* inst) {
|
||||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
@ -1364,14 +1403,119 @@ void EmitX64::EmitPackedAddU8(IR::Block& block, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
code->xor_(result, reg_a);
|
code->xor_(result, reg_a);
|
||||||
if (ge_inst) {
|
if (ge_inst) {
|
||||||
if (cpu_info.has(Xbyak::util::Cpu::tBMI2)) {
|
ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge);
|
||||||
code->mov(tmp, 0x80808080);
|
|
||||||
code->pext(reg_ge, reg_ge, tmp);
|
|
||||||
} else {
|
|
||||||
code->and_(reg_ge, 0x80808080);
|
|
||||||
code->imul(reg_ge, reg_ge, 0x0204081);
|
|
||||||
code->shr(reg_ge, 28);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitPackedAddS8(IR::Block& block, IR::Inst* inst) {
|
||||||
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
||||||
|
Xbyak::Reg32 reg_ge;
|
||||||
|
|
||||||
|
Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm();
|
||||||
|
Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
EraseInstruction(block, ge_inst);
|
||||||
|
inst->DecrementRemainingUses();
|
||||||
|
|
||||||
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
||||||
|
}
|
||||||
|
|
||||||
|
code->movd(xmm_a, reg_a);
|
||||||
|
code->movd(xmm_b, reg_b);
|
||||||
|
if (ge_inst) {
|
||||||
|
Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm();
|
||||||
|
code->movdqa(saturated_sum, xmm_a);
|
||||||
|
code->paddsb(saturated_sum, xmm_b);
|
||||||
|
code->movd(reg_ge, saturated_sum);
|
||||||
|
}
|
||||||
|
code->paddb(xmm_a, xmm_b);
|
||||||
|
code->movd(reg_a, xmm_a);
|
||||||
|
if (ge_inst) {
|
||||||
|
code->not_(reg_ge);
|
||||||
|
ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitPackedAddU16(IR::Block& block, IR::Inst* inst) {
|
||||||
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
Xbyak::Reg32 reg_a = reg_alloc.UseScratchGpr(a).cvt32();
|
||||||
|
Xbyak::Reg32 reg_b = reg_alloc.UseScratchGpr(b).cvt32();
|
||||||
|
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
|
||||||
|
Xbyak::Reg32 reg_ge, tmp;
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
EraseInstruction(block, ge_inst);
|
||||||
|
inst->DecrementRemainingUses();
|
||||||
|
|
||||||
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
||||||
|
|
||||||
|
code->mov(reg_ge, reg_a);
|
||||||
|
code->and_(reg_ge, reg_b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// SWAR Arithmetic
|
||||||
|
code->mov(result, reg_a);
|
||||||
|
code->xor_(result, reg_b);
|
||||||
|
code->and_(result, 0x80008000);
|
||||||
|
code->and_(reg_a, 0x7FFF7FFF);
|
||||||
|
code->and_(reg_b, 0x7FFF7FFF);
|
||||||
|
code->add(reg_a, reg_b);
|
||||||
|
if (ge_inst) {
|
||||||
|
tmp = reg_alloc.ScratchGpr().cvt32();
|
||||||
|
code->mov(tmp, result);
|
||||||
|
code->and_(tmp, reg_a);
|
||||||
|
code->or_(reg_ge, tmp);
|
||||||
|
}
|
||||||
|
code->xor_(result, reg_a);
|
||||||
|
if (ge_inst) {
|
||||||
|
ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitPackedAddS16(IR::Block& block, IR::Inst* inst) {
|
||||||
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
||||||
|
Xbyak::Reg32 reg_ge;
|
||||||
|
|
||||||
|
Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm();
|
||||||
|
Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
EraseInstruction(block, ge_inst);
|
||||||
|
inst->DecrementRemainingUses();
|
||||||
|
|
||||||
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
||||||
|
}
|
||||||
|
|
||||||
|
code->movd(xmm_a, reg_a);
|
||||||
|
code->movd(xmm_b, reg_b);
|
||||||
|
if (ge_inst) {
|
||||||
|
Xbyak::Xmm saturated_sum = reg_alloc.ScratchXmm();
|
||||||
|
code->movdqa(saturated_sum, xmm_a);
|
||||||
|
code->paddsw(saturated_sum, xmm_b);
|
||||||
|
code->movd(reg_ge, saturated_sum);
|
||||||
|
}
|
||||||
|
code->paddw(xmm_a, xmm_b);
|
||||||
|
code->movd(reg_a, xmm_a);
|
||||||
|
if (ge_inst) {
|
||||||
|
code->not_(reg_ge);
|
||||||
|
ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1409,15 +1553,115 @@ void EmitX64::EmitPackedSubU8(IR::Block& block, IR::Inst* inst) {
|
||||||
code->movd(reg_a, xmm_a);
|
code->movd(reg_a, xmm_a);
|
||||||
|
|
||||||
if (ge_inst) {
|
if (ge_inst) {
|
||||||
if (cpu_info.has(Xbyak::util::Cpu::tBMI2)) {
|
ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge);
|
||||||
Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32();
|
|
||||||
code->mov(tmp, 0x80808080);
|
|
||||||
code->pext(reg_ge, reg_ge, tmp);
|
|
||||||
} else {
|
|
||||||
code->and_(reg_ge, 0x80808080);
|
|
||||||
code->imul(reg_ge, reg_ge, 0x0204081);
|
|
||||||
code->shr(reg_ge, 28);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void EmitX64::EmitPackedSubS8(IR::Block& block, IR::Inst* inst) {
|
||||||
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
||||||
|
Xbyak::Reg32 reg_ge;
|
||||||
|
|
||||||
|
Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm();
|
||||||
|
Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
EraseInstruction(block, ge_inst);
|
||||||
|
inst->DecrementRemainingUses();
|
||||||
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
||||||
|
}
|
||||||
|
code->movd(xmm_b, reg_b);
|
||||||
|
code->movd(xmm_a, reg_a);
|
||||||
|
if (ge_inst) {
|
||||||
|
Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm();
|
||||||
|
code->movdqa(xmm_ge, xmm_a);
|
||||||
|
code->psubsb(xmm_ge, xmm_b);
|
||||||
|
code->movd(reg_ge, xmm_ge);
|
||||||
|
}
|
||||||
|
code->psubb(xmm_a, xmm_b);
|
||||||
|
code->movd(reg_a, xmm_a);
|
||||||
|
if (ge_inst) {
|
||||||
|
code->not_(reg_ge);
|
||||||
|
ExtractMostSignificantBitFromPackedBytes(cpu_info, code, reg_alloc, reg_ge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitPackedSubU16(IR::Block& block, IR::Inst* inst) {
|
||||||
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
||||||
|
Xbyak::Reg32 reg_ge;
|
||||||
|
|
||||||
|
Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm();
|
||||||
|
Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm();
|
||||||
|
Xbyak::Xmm xmm_ge;
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
EraseInstruction(block, ge_inst);
|
||||||
|
inst->DecrementRemainingUses();
|
||||||
|
|
||||||
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
||||||
|
xmm_ge = reg_alloc.ScratchXmm();
|
||||||
|
}
|
||||||
|
|
||||||
|
code->movd(xmm_a, reg_a);
|
||||||
|
code->movd(xmm_b, reg_b);
|
||||||
|
if (ge_inst) {
|
||||||
|
code->movdqa(xmm_ge, xmm_a);
|
||||||
|
code->pmaxuw(xmm_ge, xmm_b);
|
||||||
|
code->pcmpeqw(xmm_ge, xmm_a);
|
||||||
|
code->movd(reg_ge, xmm_ge);
|
||||||
|
}
|
||||||
|
code->psubw(xmm_a, xmm_b);
|
||||||
|
code->movd(reg_a, xmm_a);
|
||||||
|
if (ge_inst) {
|
||||||
|
ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitPackedSubS16(IR::Block& block, IR::Inst* inst) {
|
||||||
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
||||||
|
Xbyak::Reg32 reg_ge;
|
||||||
|
|
||||||
|
Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm();
|
||||||
|
Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
if (ge_inst) {
|
||||||
|
EraseInstruction(block, ge_inst);
|
||||||
|
inst->DecrementRemainingUses();
|
||||||
|
|
||||||
|
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
|
||||||
|
}
|
||||||
|
|
||||||
|
code->movd(xmm_b, reg_b);
|
||||||
|
code->movd(xmm_a, reg_a);
|
||||||
|
if (ge_inst) {
|
||||||
|
Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm();
|
||||||
|
code->movdqa(xmm_ge, xmm_a);
|
||||||
|
code->psubsw(xmm_ge, xmm_b);
|
||||||
|
code->movd(reg_ge, xmm_ge);
|
||||||
|
}
|
||||||
|
code->psubw(xmm_a, xmm_b);
|
||||||
|
code->movd(reg_a, xmm_a);
|
||||||
|
if (ge_inst) {
|
||||||
|
code->not_(reg_ge);
|
||||||
|
ExtractAndDuplicateMostSignificantBitFromPackedWords(code, reg_ge);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -328,6 +328,14 @@ Value IREmitter::CountLeadingZeros(const Value& a) {
|
||||||
return Inst(Opcode::CountLeadingZeros, {a});
|
return Inst(Opcode::CountLeadingZeros, {a});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value IREmitter::NegateLowWord(const Value& a) {
|
||||||
|
return Inst(Opcode::NegateLowWord, {a});
|
||||||
|
}
|
||||||
|
|
||||||
|
Value IREmitter::NegateHighWord(const Value& a) {
|
||||||
|
return Inst(Opcode::NegateHighWord, {a});
|
||||||
|
}
|
||||||
|
|
||||||
IREmitter::ResultAndOverflow IREmitter::SignedSaturatedAdd(const Value& a, const Value& b) {
|
IREmitter::ResultAndOverflow IREmitter::SignedSaturatedAdd(const Value& a, const Value& b) {
|
||||||
auto result = Inst(Opcode::SignedSaturatedAdd, {a, b});
|
auto result = Inst(Opcode::SignedSaturatedAdd, {a, b});
|
||||||
auto overflow = Inst(Opcode::GetOverflowFromOp, {result});
|
auto overflow = Inst(Opcode::GetOverflowFromOp, {result});
|
||||||
|
@ -346,12 +354,48 @@ IREmitter::ResultAndGE IREmitter::PackedAddU8(const Value& a, const Value& b) {
|
||||||
return {result, ge};
|
return {result, ge};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IREmitter::ResultAndGE IREmitter::PackedAddS8(const Value& a, const Value& b) {
|
||||||
|
auto result = Inst(Opcode::PackedAddS8, {a, b});
|
||||||
|
auto ge = Inst(Opcode::GetGEFromOp, {result});
|
||||||
|
return {result, ge};
|
||||||
|
}
|
||||||
|
|
||||||
|
IREmitter::ResultAndGE IREmitter::PackedAddU16(const Value& a, const Value& b) {
|
||||||
|
auto result = Inst(Opcode::PackedAddU16, {a, b});
|
||||||
|
auto ge = Inst(Opcode::GetGEFromOp, {result});
|
||||||
|
return {result, ge};
|
||||||
|
}
|
||||||
|
|
||||||
|
IREmitter::ResultAndGE IREmitter::PackedAddS16(const Value& a, const Value& b) {
|
||||||
|
auto result = Inst(Opcode::PackedAddS16, {a, b});
|
||||||
|
auto ge = Inst(Opcode::GetGEFromOp, {result});
|
||||||
|
return {result, ge};
|
||||||
|
}
|
||||||
|
|
||||||
IREmitter::ResultAndGE IREmitter::PackedSubU8(const Value& a, const Value& b) {
|
IREmitter::ResultAndGE IREmitter::PackedSubU8(const Value& a, const Value& b) {
|
||||||
auto result = Inst(Opcode::PackedSubU8, {a, b});
|
auto result = Inst(Opcode::PackedSubU8, {a, b});
|
||||||
auto ge = Inst(Opcode::GetGEFromOp, {result});
|
auto ge = Inst(Opcode::GetGEFromOp, {result});
|
||||||
return {result, ge};
|
return {result, ge};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IREmitter::ResultAndGE IREmitter::PackedSubS8(const Value& a, const Value& b) {
|
||||||
|
auto result = Inst(Opcode::PackedSubS8, {a, b});
|
||||||
|
auto ge = Inst(Opcode::GetGEFromOp, {result});
|
||||||
|
return {result, ge};
|
||||||
|
}
|
||||||
|
|
||||||
|
IREmitter::ResultAndGE IREmitter::PackedSubU16(const Value& a, const Value& b) {
|
||||||
|
auto result = Inst(Opcode::PackedSubU16, {a, b});
|
||||||
|
auto ge = Inst(Opcode::GetGEFromOp, {result});
|
||||||
|
return {result, ge};
|
||||||
|
}
|
||||||
|
|
||||||
|
IREmitter::ResultAndGE IREmitter::PackedSubS16(const Value& a, const Value& b) {
|
||||||
|
auto result = Inst(Opcode::PackedSubS16, {a, b});
|
||||||
|
auto ge = Inst(Opcode::GetGEFromOp, {result});
|
||||||
|
return {result, ge};
|
||||||
|
}
|
||||||
|
|
||||||
Value IREmitter::PackedHalvingAddU8(const Value& a, const Value& b) {
|
Value IREmitter::PackedHalvingAddU8(const Value& a, const Value& b) {
|
||||||
return Inst(Opcode::PackedHalvingAddU8, {a, b});
|
return Inst(Opcode::PackedHalvingAddU8, {a, b});
|
||||||
}
|
}
|
||||||
|
|
|
@ -133,12 +133,20 @@ public:
|
||||||
Value ByteReverseHalf(const Value& a);
|
Value ByteReverseHalf(const Value& a);
|
||||||
Value ByteReverseDual(const Value& a);
|
Value ByteReverseDual(const Value& a);
|
||||||
Value CountLeadingZeros(const Value& a);
|
Value CountLeadingZeros(const Value& a);
|
||||||
|
Value NegateLowWord(const Value& a);
|
||||||
|
Value NegateHighWord(const Value& a);
|
||||||
|
|
||||||
ResultAndOverflow SignedSaturatedAdd(const Value& a, const Value& b);
|
ResultAndOverflow SignedSaturatedAdd(const Value& a, const Value& b);
|
||||||
ResultAndOverflow SignedSaturatedSub(const Value& a, const Value& b);
|
ResultAndOverflow SignedSaturatedSub(const Value& a, const Value& b);
|
||||||
|
|
||||||
ResultAndGE PackedAddU8(const Value& a, const Value& b);
|
ResultAndGE PackedAddU8(const Value& a, const Value& b);
|
||||||
|
ResultAndGE PackedAddS8(const Value& a, const Value& b);
|
||||||
|
ResultAndGE PackedAddU16(const Value& a, const Value& b);
|
||||||
|
ResultAndGE PackedAddS16(const Value& a, const Value& b);
|
||||||
ResultAndGE PackedSubU8(const Value& a, const Value& b);
|
ResultAndGE PackedSubU8(const Value& a, const Value& b);
|
||||||
|
ResultAndGE PackedSubS8(const Value& a, const Value& b);
|
||||||
|
ResultAndGE PackedSubU16(const Value& a, const Value& b);
|
||||||
|
ResultAndGE PackedSubS16(const Value& a, const Value& b);
|
||||||
Value PackedHalvingAddU8(const Value& a, const Value& b);
|
Value PackedHalvingAddU8(const Value& a, const Value& b);
|
||||||
Value PackedHalvingAddS8(const Value& a, const Value& b);
|
Value PackedHalvingAddS8(const Value& a, const Value& b);
|
||||||
Value PackedHalvingSubU8(const Value& a, const Value& b);
|
Value PackedHalvingSubU8(const Value& a, const Value& b);
|
||||||
|
|
|
@ -73,6 +73,8 @@ OPCODE(ByteReverseWord, T::U32, T::U32
|
||||||
OPCODE(ByteReverseHalf, T::U16, T::U16 )
|
OPCODE(ByteReverseHalf, T::U16, T::U16 )
|
||||||
OPCODE(ByteReverseDual, T::U64, T::U64 )
|
OPCODE(ByteReverseDual, T::U64, T::U64 )
|
||||||
OPCODE(CountLeadingZeros, T::U32, T::U32 )
|
OPCODE(CountLeadingZeros, T::U32, T::U32 )
|
||||||
|
OPCODE(NegateLowWord, T::U32, T::U32 )
|
||||||
|
OPCODE(NegateHighWord, T::U32, T::U32 )
|
||||||
|
|
||||||
// Saturated instructions
|
// Saturated instructions
|
||||||
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )
|
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )
|
||||||
|
@ -80,7 +82,13 @@ OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32
|
||||||
|
|
||||||
// Packed instructions
|
// Packed instructions
|
||||||
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(PackedAddS8, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedSubU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSubU8, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(PackedSubS8, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(PackedAddU16, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(PackedAddS16, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(PackedSubU16, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(PackedSubS16, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedHalvingAddS8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedHalvingAddS8, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedHalvingSubU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedHalvingSubU8, T::U32, T::U32, T::U32 )
|
||||||
|
|
|
@ -11,13 +11,25 @@ namespace Arm {
|
||||||
|
|
||||||
// Parallel Add/Subtract (Modulo arithmetic) instructions
|
// Parallel Add/Subtract (Modulo arithmetic) instructions
|
||||||
bool ArmTranslatorVisitor::arm_SADD8(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_SADD8(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
UNUSED(cond, n, d, m);
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
return InterpretThisInstruction();
|
return UnpredictableInstruction();
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto result = ir.PackedAddS8(ir.GetRegister(n), ir.GetRegister(m));
|
||||||
|
ir.SetRegister(d, result.result);
|
||||||
|
ir.SetGEFlags(result.ge);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_SADD16(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_SADD16(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
UNUSED(cond, n, d, m);
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
return InterpretThisInstruction();
|
return UnpredictableInstruction();
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto result = ir.PackedAddS16(ir.GetRegister(n), ir.GetRegister(m));
|
||||||
|
ir.SetRegister(d, result.result);
|
||||||
|
ir.SetGEFlags(result.ge);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_SASX(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_SASX(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
@ -31,13 +43,25 @@ bool ArmTranslatorVisitor::arm_SSAX(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_SSUB8(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_SSUB8(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
UNUSED(cond, n, d, m);
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
return InterpretThisInstruction();
|
return UnpredictableInstruction();
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto result = ir.PackedSubS8(ir.GetRegister(n), ir.GetRegister(m));
|
||||||
|
ir.SetRegister(d, result.result);
|
||||||
|
ir.SetGEFlags(result.ge);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_SSUB16(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_SSUB16(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
UNUSED(cond, n, d, m);
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
return InterpretThisInstruction();
|
return UnpredictableInstruction();
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto result = ir.PackedSubS16(ir.GetRegister(n), ir.GetRegister(m));
|
||||||
|
ir.SetRegister(d, result.result);
|
||||||
|
ir.SetGEFlags(result.ge);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_UADD8(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_UADD8(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
@ -52,8 +76,14 @@ bool ArmTranslatorVisitor::arm_UADD8(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_UADD16(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_UADD16(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
UNUSED(cond, n, d, m);
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
return InterpretThisInstruction();
|
return UnpredictableInstruction();
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto result = ir.PackedAddU16(ir.GetRegister(n), ir.GetRegister(m));
|
||||||
|
ir.SetRegister(d, result.result);
|
||||||
|
ir.SetGEFlags(result.ge);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_UASX(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_UASX(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
@ -99,11 +129,16 @@ bool ArmTranslatorVisitor::arm_USUB8(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_USUB16(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_USUB16(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
UNUSED(cond, n, d, m);
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
return InterpretThisInstruction();
|
return UnpredictableInstruction();
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto result = ir.PackedSubU16(ir.GetRegister(n), ir.GetRegister(m));
|
||||||
|
ir.SetRegister(d, result.result);
|
||||||
|
ir.SetGEFlags(result.ge);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Parallel Add/Subtract (Saturating) instructions
|
// Parallel Add/Subtract (Saturating) instructions
|
||||||
bool ArmTranslatorVisitor::arm_QADD8(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_QADD8(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
|
@ -201,7 +236,6 @@ bool ArmTranslatorVisitor::arm_UQSUB16(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Parallel Add/Subtract (Halving) instructions
|
// Parallel Add/Subtract (Halving) instructions
|
||||||
bool ArmTranslatorVisitor::arm_SHADD8(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_SHADD8(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
|
|
|
@ -895,27 +895,33 @@ TEST_CASE("Fuzz ARM multiply instructions", "[JitX64]") {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Fuzz ARM parallel instructions", "[JitX64]") {
|
TEST_CASE("Fuzz ARM parallel instructions", "[JitX64][parallel]") {
|
||||||
const auto is_valid = [](u32 instr) -> bool {
|
const auto is_valid = [](u32 instr) -> bool {
|
||||||
// R15 as Rd, Rn, or Rm is UNPREDICTABLE
|
// R15 as Rd, Rn, or Rm is UNPREDICTABLE
|
||||||
return Bits<0, 3>(instr) != 0b1111 && Bits<12, 15>(instr) != 0b1111 && Bits<16, 19>(instr) != 0b1111;
|
return Bits<0, 3>(instr) != 0b1111 && Bits<12, 15>(instr) != 0b1111 && Bits<16, 19>(instr) != 0b1111;
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::array<InstructionGenerator, 12> modulo_instructions = {{
|
const std::array<InstructionGenerator, 4> modulo_add_instructions = {{
|
||||||
InstructionGenerator("cccc01100001nnnndddd11111001mmmm", is_valid), // SADD8
|
InstructionGenerator("cccc01100001nnnndddd11111001mmmm", is_valid), // SADD8
|
||||||
InstructionGenerator("cccc01100001nnnndddd11110001mmmm", is_valid), // SADD16
|
InstructionGenerator("cccc01100001nnnndddd11110001mmmm", is_valid), // SADD16
|
||||||
InstructionGenerator("cccc01100001nnnndddd11110011mmmm", is_valid), // SASX
|
|
||||||
InstructionGenerator("cccc01100001nnnndddd11110101mmmm", is_valid), // SSAX
|
|
||||||
InstructionGenerator("cccc01100001nnnndddd11111111mmmm", is_valid), // SSUB8
|
|
||||||
InstructionGenerator("cccc01100001nnnndddd11110111mmmm", is_valid), // SSUB16
|
|
||||||
InstructionGenerator("cccc01100101nnnndddd11111001mmmm", is_valid), // UADD8
|
InstructionGenerator("cccc01100101nnnndddd11111001mmmm", is_valid), // UADD8
|
||||||
InstructionGenerator("cccc01100101nnnndddd11110001mmmm", is_valid), // UADD16
|
InstructionGenerator("cccc01100101nnnndddd11110001mmmm", is_valid), // UADD16
|
||||||
InstructionGenerator("cccc01100101nnnndddd11110011mmmm", is_valid), // UASX
|
}};
|
||||||
InstructionGenerator("cccc01100101nnnndddd11110101mmmm", is_valid), // USAX
|
|
||||||
|
const std::array<InstructionGenerator, 4> modulo_sub_instructions = {{
|
||||||
|
InstructionGenerator("cccc01100001nnnndddd11111111mmmm", is_valid), // SSUB8
|
||||||
|
InstructionGenerator("cccc01100001nnnndddd11110111mmmm", is_valid), // SSUB16
|
||||||
InstructionGenerator("cccc01100101nnnndddd11111111mmmm", is_valid), // USUB8
|
InstructionGenerator("cccc01100101nnnndddd11111111mmmm", is_valid), // USUB8
|
||||||
InstructionGenerator("cccc01100101nnnndddd11110111mmmm", is_valid), // USUB16
|
InstructionGenerator("cccc01100101nnnndddd11110111mmmm", is_valid), // USUB16
|
||||||
}};
|
}};
|
||||||
|
|
||||||
|
const std::array<InstructionGenerator, 4> modulo_exchange_instructions = {{
|
||||||
|
InstructionGenerator("cccc01100001nnnndddd11110011mmmm", is_valid), // SASX
|
||||||
|
InstructionGenerator("cccc01100001nnnndddd11110101mmmm", is_valid), // SSAX
|
||||||
|
InstructionGenerator("cccc01100101nnnndddd11110011mmmm", is_valid), // UASX
|
||||||
|
InstructionGenerator("cccc01100101nnnndddd11110101mmmm", is_valid), // USAX
|
||||||
|
}};
|
||||||
|
|
||||||
const std::array<InstructionGenerator, 8> saturating_instructions = {{
|
const std::array<InstructionGenerator, 8> saturating_instructions = {{
|
||||||
InstructionGenerator("cccc01100010nnnndddd11111001mmmm", is_valid), // QADD8
|
InstructionGenerator("cccc01100010nnnndddd11111001mmmm", is_valid), // QADD8
|
||||||
InstructionGenerator("cccc01100010nnnndddd11111111mmmm", is_valid), // QSUB8
|
InstructionGenerator("cccc01100010nnnndddd11111111mmmm", is_valid), // QSUB8
|
||||||
|
@ -942,9 +948,21 @@ TEST_CASE("Fuzz ARM parallel instructions", "[JitX64]") {
|
||||||
InstructionGenerator("cccc01100111nnnndddd11110111mmmm", is_valid), // UHSUB16
|
InstructionGenerator("cccc01100111nnnndddd11110111mmmm", is_valid), // UHSUB16
|
||||||
}};
|
}};
|
||||||
|
|
||||||
SECTION("Parallel Add/Subtract (Modulo)") {
|
SECTION("Parallel Add (Modulo)") {
|
||||||
FuzzJitArm(1, 1, 10000, [&modulo_instructions]() -> u32 {
|
FuzzJitArm(1, 1, 10000, [&modulo_add_instructions]() -> u32 {
|
||||||
return modulo_instructions[RandInt<size_t>(0, modulo_instructions.size() - 1)].Generate();
|
return modulo_add_instructions[RandInt<size_t>(0, modulo_add_instructions.size() - 1)].Generate();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
SECTION("Parallel Subtract (Modulo)") {
|
||||||
|
FuzzJitArm(1, 1, 10000, [&modulo_sub_instructions]() -> u32 {
|
||||||
|
return modulo_sub_instructions[RandInt<size_t>(0, modulo_sub_instructions.size() - 1)].Generate();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
SECTION("Parallel Exchange (Modulo)") {
|
||||||
|
FuzzJitArm(1, 1, 10000, [&modulo_exchange_instructions]() -> u32 {
|
||||||
|
return modulo_exchange_instructions[RandInt<size_t>(0, modulo_exchange_instructions.size() - 1)].Generate();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue