Merge pull request #284 from neobrain/pica_progress

Pica progress: Texturing, shaders, cleanups & more
This commit is contained in:
bunnei 2014-12-20 12:40:06 -05:00
commit acabd7be82
16 changed files with 763 additions and 484 deletions

3
.gitmodules vendored
View file

@ -4,3 +4,6 @@
[submodule "externals/boost"] [submodule "externals/boost"]
path = externals/boost path = externals/boost
url = https://github.com/citra-emu/ext-boost.git url = https://github.com/citra-emu/ext-boost.git
[submodule "externals/nihstro"]
path = externals/nihstro
url = https://github.com/neobrain/nihstro.git

View file

@ -141,6 +141,8 @@ set(INI_PREFIX "${CMAKE_CURRENT_SOURCE_DIR}/externals/inih")
include_directories(${INI_PREFIX}) include_directories(${INI_PREFIX})
add_subdirectory(${INI_PREFIX}) add_subdirectory(${INI_PREFIX})
include_directories(externals/nihstro/include)
# process subdirectories # process subdirectories
if(ENABLE_QT) if(ENABLE_QT)
include_directories(externals/qhexedit) include_directories(externals/qhexedit)

1
externals/nihstro vendored Submodule

@ -0,0 +1 @@
Subproject commit fc71f8684d26ccf277ad68809c8bd7273141fe89

View file

@ -39,15 +39,17 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const
switch (index.column()) { switch (index.column()) {
case 0: case 0:
{ {
std::map<Pica::DebugContext::Event, QString> map; static const std::map<Pica::DebugContext::Event, QString> map = {
map.insert({Pica::DebugContext::Event::CommandLoaded, tr("Pica command loaded")}); { Pica::DebugContext::Event::CommandLoaded, tr("Pica command loaded") },
map.insert({Pica::DebugContext::Event::CommandProcessed, tr("Pica command processed")}); { Pica::DebugContext::Event::CommandProcessed, tr("Pica command processed") },
map.insert({Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch")}); { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") },
map.insert({Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch")}); { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") },
{ Pica::DebugContext::Event::VertexLoaded, tr("Vertex Loaded") }
};
_dbg_assert_(Debug_GPU, map.size() == static_cast<size_t>(Pica::DebugContext::Event::NumEvents)); _dbg_assert_(Debug_GPU, map.size() == static_cast<size_t>(Pica::DebugContext::Event::NumEvents));
return map[event]; return (map.find(event) != map.end()) ? map.at(event) : QString();
} }
case 1: case 1:

View file

@ -24,7 +24,7 @@ QImage LoadTexture(u8* src, const Pica::DebugUtils::TextureInfo& info) {
QImage decoded_image(info.width, info.height, QImage::Format_ARGB32); QImage decoded_image(info.width, info.height, QImage::Format_ARGB32);
for (int y = 0; y < info.height; ++y) { for (int y = 0; y < info.height; ++y) {
for (int x = 0; x < info.width; ++x) { for (int x = 0; x < info.width; ++x) {
Math::Vec4<u8> color = Pica::DebugUtils::LookupTexture(src, x, y, info); Math::Vec4<u8> color = Pica::DebugUtils::LookupTexture(src, x, y, info, true);
decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a())); decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a()));
} }
} }
@ -47,7 +47,7 @@ public:
}; };
TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo& info, QWidget* parent) TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo& info, QWidget* parent)
: QDockWidget(tr("Texture 0x%1").arg(info.address, 8, 16, QLatin1Char('0'))), : QDockWidget(tr("Texture 0x%1").arg(info.physical_address, 8, 16, QLatin1Char('0'))),
info(info) { info(info) {
QWidget* main_widget = new QWidget; QWidget* main_widget = new QWidget;
@ -60,7 +60,7 @@ TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo
phys_address_spinbox->SetBase(16); phys_address_spinbox->SetBase(16);
phys_address_spinbox->SetRange(0, 0xFFFFFFFF); phys_address_spinbox->SetRange(0, 0xFFFFFFFF);
phys_address_spinbox->SetPrefix("0x"); phys_address_spinbox->SetPrefix("0x");
phys_address_spinbox->SetValue(info.address); phys_address_spinbox->SetValue(info.physical_address);
connect(phys_address_spinbox, SIGNAL(ValueChanged(qint64)), this, SLOT(OnAddressChanged(qint64))); connect(phys_address_spinbox, SIGNAL(ValueChanged(qint64)), this, SLOT(OnAddressChanged(qint64)));
QComboBox* format_choice = new QComboBox; QComboBox* format_choice = new QComboBox;
@ -69,6 +69,13 @@ TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo
format_choice->addItem(tr("RGBA5551")); format_choice->addItem(tr("RGBA5551"));
format_choice->addItem(tr("RGB565")); format_choice->addItem(tr("RGB565"));
format_choice->addItem(tr("RGBA4")); format_choice->addItem(tr("RGBA4"));
format_choice->addItem(tr("IA8"));
format_choice->addItem(tr("UNK6"));
format_choice->addItem(tr("I8"));
format_choice->addItem(tr("A8"));
format_choice->addItem(tr("IA4"));
format_choice->addItem(tr("UNK10"));
format_choice->addItem(tr("A4"));
format_choice->setCurrentIndex(static_cast<int>(info.format)); format_choice->setCurrentIndex(static_cast<int>(info.format));
connect(format_choice, SIGNAL(currentIndexChanged(int)), this, SLOT(OnFormatChanged(int))); connect(format_choice, SIGNAL(currentIndexChanged(int)), this, SLOT(OnFormatChanged(int)));
@ -125,7 +132,7 @@ TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo
} }
void TextureInfoDockWidget::OnAddressChanged(qint64 value) { void TextureInfoDockWidget::OnAddressChanged(qint64 value) {
info.address = value; info.physical_address = value;
emit UpdatePixmap(ReloadPixmap()); emit UpdatePixmap(ReloadPixmap());
} }
@ -150,7 +157,7 @@ void TextureInfoDockWidget::OnStrideChanged(int value) {
} }
QPixmap TextureInfoDockWidget::ReloadPixmap() const { QPixmap TextureInfoDockWidget::ReloadPixmap() const {
u8* src = Memory::GetPointer(info.address); u8* src = Memory::GetPointer(Pica::PAddrToVAddr(info.physical_address));
return QPixmap::fromImage(LoadTexture(src, info)); return QPixmap::fromImage(LoadTexture(src, info));
} }
@ -223,9 +230,21 @@ void GPUCommandListModel::OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace&
void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) { void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) {
const int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toInt(); const int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toInt();
if (COMMAND_IN_RANGE(command_id, texture0) ||
COMMAND_IN_RANGE(command_id, texture1) ||
COMMAND_IN_RANGE(command_id, texture2)) {
unsigned index;
if (COMMAND_IN_RANGE(command_id, texture0)) { if (COMMAND_IN_RANGE(command_id, texture0)) {
auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(Pica::registers.texture0, index = 0;
Pica::registers.texture0_format); } else if (COMMAND_IN_RANGE(command_id, texture1)) {
index = 1;
} else {
index = 2;
}
auto config = Pica::registers.GetTextures()[index].config;
auto format = Pica::registers.GetTextures()[index].format;
auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format);
// TODO: Instead, emit a signal here to be caught by the main window widget. // TODO: Instead, emit a signal here to be caught by the main window widget.
auto main_window = static_cast<QMainWindow*>(parent()); auto main_window = static_cast<QMainWindow*>(parent());
@ -237,10 +256,23 @@ void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) {
QWidget* new_info_widget; QWidget* new_info_widget;
const int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toInt(); const int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toInt();
if (COMMAND_IN_RANGE(command_id, texture0) ||
COMMAND_IN_RANGE(command_id, texture1) ||
COMMAND_IN_RANGE(command_id, texture2)) {
unsigned index;
if (COMMAND_IN_RANGE(command_id, texture0)) { if (COMMAND_IN_RANGE(command_id, texture0)) {
u8* src = Memory::GetPointer(Pica::registers.texture0.GetPhysicalAddress()); index = 0;
auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(Pica::registers.texture0, } else if (COMMAND_IN_RANGE(command_id, texture1)) {
Pica::registers.texture0_format); index = 1;
} else {
index = 2;
}
auto config = Pica::registers.GetTextures()[index].config;
auto format = Pica::registers.GetTextures()[index].format;
auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format);
u8* src = Memory::GetPointer(Pica::PAddrToVAddr(config.GetPhysicalAddress()));
new_info_widget = new TextureInfoWidget(src, info); new_info_widget = new TextureInfoWidget(src, info);
} else { } else {
new_info_widget = new QWidget; new_info_widget = new QWidget;

View file

@ -125,6 +125,7 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug
setWidget(main_widget); setWidget(main_widget);
// Load current data - TODO: Make sure this works when emulation is not running // Load current data - TODO: Make sure this works when emulation is not running
if (debug_context && debug_context->at_breakpoint)
emit Update(); emit Update();
widget()->setEnabled(false); // TODO: Only enable if currently at breakpoint widget()->setEnabled(false); // TODO: Only enable if currently at breakpoint
} }
@ -198,7 +199,7 @@ void GraphicsFramebufferWidget::OnUpdate()
auto framebuffer = Pica::registers.framebuffer; auto framebuffer = Pica::registers.framebuffer;
using Framebuffer = decltype(framebuffer); using Framebuffer = decltype(framebuffer);
framebuffer_address = framebuffer.GetColorBufferAddress(); framebuffer_address = framebuffer.GetColorBufferPhysicalAddress();
framebuffer_width = framebuffer.GetWidth(); framebuffer_width = framebuffer.GetWidth();
framebuffer_height = framebuffer.GetHeight(); framebuffer_height = framebuffer.GetHeight();
framebuffer_format = static_cast<Format>(framebuffer.color_format); framebuffer_format = static_cast<Format>(framebuffer.color_format);
@ -223,7 +224,7 @@ void GraphicsFramebufferWidget::OnUpdate()
case Format::RGBA8: case Format::RGBA8:
{ {
QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
u32* color_buffer = (u32*)Memory::GetPointer(framebuffer_address); u32* color_buffer = (u32*)Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
for (unsigned y = 0; y < framebuffer_height; ++y) { for (unsigned y = 0; y < framebuffer_height; ++y) {
for (unsigned x = 0; x < framebuffer_width; ++x) { for (unsigned x = 0; x < framebuffer_width; ++x) {
u32 value = *(color_buffer + x + y * framebuffer_width); u32 value = *(color_buffer + x + y * framebuffer_width);
@ -238,7 +239,7 @@ void GraphicsFramebufferWidget::OnUpdate()
case Format::RGB8: case Format::RGB8:
{ {
QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
u8* color_buffer = Memory::GetPointer(framebuffer_address); u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
for (unsigned y = 0; y < framebuffer_height; ++y) { for (unsigned y = 0; y < framebuffer_height; ++y) {
for (unsigned x = 0; x < framebuffer_width; ++x) { for (unsigned x = 0; x < framebuffer_width; ++x) {
u8* pixel_pointer = color_buffer + x * 3 + y * 3 * framebuffer_width; u8* pixel_pointer = color_buffer + x * 3 + y * 3 * framebuffer_width;
@ -253,7 +254,7 @@ void GraphicsFramebufferWidget::OnUpdate()
case Format::RGBA5551: case Format::RGBA5551:
{ {
QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
u32* color_buffer = (u32*)Memory::GetPointer(framebuffer_address); u32* color_buffer = (u32*)Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
for (unsigned y = 0; y < framebuffer_height; ++y) { for (unsigned y = 0; y < framebuffer_height; ++y) {
for (unsigned x = 0; x < framebuffer_width; ++x) { for (unsigned x = 0; x < framebuffer_width; ++x) {
u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2); u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2);

View file

@ -142,7 +142,7 @@ public:
__forceinline BitField& operator=(T val) __forceinline BitField& operator=(T val)
{ {
storage = (storage & ~GetMask()) | (((StorageType)val << position) & GetMask()); Assign(val);
return *this; return *this;
} }
@ -151,6 +151,10 @@ public:
return Value(); return Value();
} }
__forceinline void Assign(const T& value) {
storage = (storage & ~GetMask()) | (((StorageType)value << position) & GetMask());
}
__forceinline T Value() const __forceinline T Value() const
{ {
if (std::numeric_limits<T>::is_signed) if (std::numeric_limits<T>::is_signed)

View file

@ -56,10 +56,11 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
const auto& attribute_config = registers.vertex_attributes; const auto& attribute_config = registers.vertex_attributes;
const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress()); const u32 base_address = attribute_config.GetPhysicalBaseAddress();
// Information about internal vertex attributes // Information about internal vertex attributes
const u8* vertex_attribute_sources[16]; u32 vertex_attribute_sources[16];
std::fill(vertex_attribute_sources, &vertex_attribute_sources[16], 0xdeadbeef);
u32 vertex_attribute_strides[16]; u32 vertex_attribute_strides[16];
u32 vertex_attribute_formats[16]; u32 vertex_attribute_formats[16];
u32 vertex_attribute_elements[16]; u32 vertex_attribute_elements[16];
@ -69,7 +70,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
for (int loader = 0; loader < 12; ++loader) { for (int loader = 0; loader < 12; ++loader) {
const auto& loader_config = attribute_config.attribute_loaders[loader]; const auto& loader_config = attribute_config.attribute_loaders[loader];
const u8* load_address = base_address + loader_config.data_offset; u32 load_address = base_address + loader_config.data_offset;
// TODO: What happens if a loader overwrites a previous one's data? // TODO: What happens if a loader overwrites a previous one's data?
for (unsigned component = 0; component < loader_config.component_count; ++component) { for (unsigned component = 0; component < loader_config.component_count; ++component) {
@ -87,7 +88,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
const auto& index_info = registers.index_array; const auto& index_info = registers.index_array;
const u8* index_address_8 = (u8*)base_address + index_info.offset; const u8* index_address_8 = Memory::GetPointer(PAddrToVAddr(base_address + index_info.offset));
const u16* index_address_16 = (u16*)index_address_8; const u16* index_address_16 = (u16*)index_address_8;
bool index_u16 = (bool)index_info.format; bool index_u16 = (bool)index_info.format;
@ -108,7 +109,14 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
const u8* srcdata = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]));
// TODO(neobrain): Ocarina of Time 3D has GetNumTotalAttributes return 8,
// yet only provides 2 valid source data addresses. Need to figure out
// what's wrong there, until then we just continue when address lookup fails
if (srcdata == nullptr)
continue;
const float srcval = (vertex_attribute_formats[i] == 0) ? *(s8*)srcdata : const float srcval = (vertex_attribute_formats[i] == 0) ? *(s8*)srcdata :
(vertex_attribute_formats[i] == 1) ? *(u8*)srcdata : (vertex_attribute_formats[i] == 1) ? *(u8*)srcdata :
(vertex_attribute_formats[i] == 2) ? *(s16*)srcdata : (vertex_attribute_formats[i] == 2) ? *(s16*)srcdata :
@ -116,13 +124,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
input.attr[i][comp] = float24::FromFloat32(srcval); input.attr[i][comp] = float24::FromFloat32(srcval);
LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f", LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f",
comp, i, vertex, index, comp, i, vertex, index,
attribute_config.GetBaseAddress(), attribute_config.GetPhysicalBaseAddress(),
vertex_attribute_sources[i] - base_address, vertex_attribute_sources[i] - base_address,
srcdata - vertex_attribute_sources[i], vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
input.attr[i][comp].ToFloat32()); input.attr[i][comp].ToFloat32());
} }
} }
if (g_debug_context)
g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
// NOTE: When dumping geometry, we simply assume that the first input attribute // NOTE: When dumping geometry, we simply assume that the first input attribute
// corresponds to the position for now. // corresponds to the position for now.
DebugUtils::GeometryDumper::Vertex dumped_vertex = { DebugUtils::GeometryDumper::Vertex dumped_vertex = {
@ -151,6 +162,12 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
break; break;
} }
case PICA_REG_INDEX(vs_bool_uniforms):
for (unsigned i = 0; i < 16; ++i)
VertexShader::GetBoolUniform(i) = (registers.vs_bool_uniforms.Value() & (1 << i));
break;
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1):
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2):
case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3):

View file

@ -14,6 +14,8 @@
#include <png.h> #include <png.h>
#endif #endif
#include <nihstro/shader_binary.h>
#include "common/log.h" #include "common/log.h"
#include "common/file_util.h" #include "common/file_util.h"
@ -22,6 +24,10 @@
#include "debug_utils.h" #include "debug_utils.h"
using nihstro::DVLBHeader;
using nihstro::DVLEHeader;
using nihstro::DVLPHeader;
namespace Pica { namespace Pica {
void DebugContext::OnEvent(Event event, void* data) { void DebugContext::OnEvent(Event event, void* data) {
@ -98,65 +104,6 @@ void GeometryDumper::Dump() {
} }
} }
#pragma pack(1)
struct DVLBHeader {
enum : u32 {
MAGIC_WORD = 0x424C5644, // "DVLB"
};
u32 magic_word;
u32 num_programs;
// u32 dvle_offset_table[];
};
static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size");
struct DVLPHeader {
enum : u32 {
MAGIC_WORD = 0x504C5644, // "DVLP"
};
u32 magic_word;
u32 version;
u32 binary_offset; // relative to DVLP start
u32 binary_size_words;
u32 swizzle_patterns_offset;
u32 swizzle_patterns_num_entries;
u32 unk2;
};
static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size");
struct DVLEHeader {
enum : u32 {
MAGIC_WORD = 0x454c5644, // "DVLE"
};
enum class ShaderType : u8 {
VERTEX = 0,
GEOMETRY = 1,
};
u32 magic_word;
u16 pad1;
ShaderType type;
u8 pad2;
u32 main_offset_words; // offset within binary blob
u32 endmain_offset_words;
u32 pad3;
u32 pad4;
u32 constant_table_offset;
u32 constant_table_size; // number of entries
u32 label_table_offset;
u32 label_table_size;
u32 output_register_table_offset;
u32 output_register_table_size;
u32 uniform_table_offset;
u32 uniform_table_size;
u32 symbol_table_offset;
u32 symbol_table_size;
};
static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size");
#pragma pack()
void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size,
u32 main_offset, const Regs::VSOutputAttributes* output_attributes) u32 main_offset, const Regs::VSOutputAttributes* output_attributes)
@ -276,8 +223,8 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data
dvlp.binary_size_words = binary_size; dvlp.binary_size_words = binary_size;
QueueForWriting((u8*)binary_data, binary_size * sizeof(u32)); QueueForWriting((u8*)binary_data, binary_size * sizeof(u32));
dvlp.swizzle_patterns_offset = write_offset - dvlp_offset; dvlp.swizzle_info_offset = write_offset - dvlp_offset;
dvlp.swizzle_patterns_num_entries = swizzle_size; dvlp.swizzle_info_num_entries = swizzle_size;
u32 dummy = 0; u32 dummy = 0;
for (unsigned int i = 0; i < swizzle_size; ++i) { for (unsigned int i = 0; i < swizzle_size; ++i) {
QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i])); QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i]));
@ -356,10 +303,29 @@ std::unique_ptr<PicaTrace> FinishPicaTracing()
return std::move(ret); return std::move(ret);
} }
const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info) { const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) {
_dbg_assert_(Debug_GPU, info.format == Pica::Regs::TextureFormat::RGB8);
// Cf. rasterizer code for an explanation of this algorithm. // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
// of which is composed of four 2x2 subtiles each of which is composed of four texels.
// Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
// texels are laid out in a 2x2 subtile like this:
// 2 3
// 0 1
//
// The full 8x8 tile has the texels arranged like this:
//
// 42 43 46 47 58 59 62 63
// 40 41 44 45 56 57 60 61
// 34 35 38 39 50 51 54 55
// 32 33 36 37 48 49 52 53
// 10 11 14 15 26 27 30 31
// 08 09 12 13 24 25 28 29
// 02 03 06 07 18 19 22 23
// 00 01 04 05 16 17 20 21
// TODO(neobrain): Not sure if this swizzling pattern is used for all textures.
// To be flexible in case different but similar patterns are used, we keep this
// somewhat inefficient code around for now.
int texel_index_within_tile = 0; int texel_index_within_tile = 0;
for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {
int sub_tile_width = 1 << block_size_index; int sub_tile_width = 1 << block_size_index;
@ -376,19 +342,134 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
int coarse_x = (x / block_width) * block_width; int coarse_x = (x / block_width) * block_width;
int coarse_y = (y / block_height) * block_height; int coarse_y = (y / block_height) * block_height;
switch (info.format) {
case Regs::TextureFormat::RGBA8:
{
const u8* source_ptr = source + coarse_x * block_height * 4 + coarse_y * info.stride + texel_index_within_tile * 4;
return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] };
}
case Regs::TextureFormat::RGB8:
{
const u8* source_ptr = source + coarse_x * block_height * 3 + coarse_y * info.stride + texel_index_within_tile * 3; const u8* source_ptr = source + coarse_x * block_height * 3 + coarse_y * info.stride + texel_index_within_tile * 3;
return { source_ptr[2], source_ptr[1], source_ptr[0], 255 }; return { source_ptr[2], source_ptr[1], source_ptr[0], 255 };
} }
case Regs::TextureFormat::RGBA5551:
{
const u16 source_ptr = *(const u16*)(source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2);
u8 r = (source_ptr >> 11) & 0x1F;
u8 g = ((source_ptr) >> 6) & 0x1F;
u8 b = (source_ptr >> 1) & 0x1F;
u8 a = source_ptr & 1;
return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 3) | (g >> 2), (b << 3) | (b >> 2), disable_alpha ? 255 : (a * 255));
}
case Regs::TextureFormat::RGB565:
{
const u16 source_ptr = *(const u16*)(source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2);
u8 r = (source_ptr >> 11) & 0x1F;
u8 g = ((source_ptr) >> 5) & 0x3F;
u8 b = (source_ptr) & 0x1F;
return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 2) | (g >> 4), (b << 3) | (b >> 2), 255);
}
case Regs::TextureFormat::RGBA4:
{
const u8* source_ptr = source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2;
u8 r = source_ptr[1] >> 4;
u8 g = source_ptr[1] & 0xFF;
u8 b = source_ptr[0] >> 4;
u8 a = source_ptr[0] & 0xFF;
r = (r << 4) | r;
g = (g << 4) | g;
b = (b << 4) | b;
a = (a << 4) | a;
return { r, g, b, disable_alpha ? (u8)255 : a };
}
case Regs::TextureFormat::IA8:
{
const u8* source_ptr = source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2;
// TODO: component order not verified
if (disable_alpha) {
// Show intensity as red, alpha as green
return { source_ptr[0], source_ptr[1], 0, 255 };
} else {
return { source_ptr[0], source_ptr[0], source_ptr[0], source_ptr[1]};
}
}
case Regs::TextureFormat::I8:
{
const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile;
return { *source_ptr, *source_ptr, *source_ptr, 255 };
}
case Regs::TextureFormat::A8:
{
const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile;
if (disable_alpha) {
return { *source_ptr, *source_ptr, *source_ptr, 255 };
} else {
return { 0, 0, 0, *source_ptr };
}
}
case Regs::TextureFormat::IA4:
{
const u8* source_ptr = source + coarse_x * block_height / 2 + coarse_y * info.stride + texel_index_within_tile / 2;
// TODO: component order not verified
u8 i = (*source_ptr) & 0xF;
u8 a = ((*source_ptr) & 0xF0) >> 4;
a |= a << 4;
i |= i << 4;
if (disable_alpha) {
// Show intensity as red, alpha as green
return { i, a, 0, 255 };
} else {
return { i, i, i, a };
}
}
case Regs::TextureFormat::A4:
{
const u8* source_ptr = source + coarse_x * block_height / 2 + coarse_y * info.stride + texel_index_within_tile / 2;
// TODO: component order not verified
u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4);
a |= a << 4;
if (disable_alpha) {
return { *source_ptr, *source_ptr, *source_ptr, 255 };
} else {
return { 0, 0, 0, *source_ptr };
}
}
default:
LOG_ERROR(HW_GPU, "Unknown texture format: %x", (u32)info.format);
_dbg_assert_(HW_GPU, 0);
return {};
}
}
TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config,
const Regs::TextureFormat& format) const Regs::TextureFormat& format)
{ {
TextureInfo info; TextureInfo info;
info.address = config.GetPhysicalAddress(); info.physical_address = config.GetPhysicalAddress();
info.width = config.width; info.width = config.width;
info.height = config.height; info.height = config.height;
info.format = format; info.format = format;
info.stride = Pica::Regs::BytesPerPixel(info.format) * info.width; info.stride = Pica::Regs::NibblesPerPixel(info.format) * info.width / 2;
return info; return info;
} }
@ -499,26 +580,32 @@ void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages)
for (size_t index = 0; index < stages.size(); ++index) { for (size_t index = 0; index < stages.size(); ++index) {
const auto& tev_stage = stages[index]; const auto& tev_stage = stages[index];
const std::map<Source, std::string> source_map = { static const std::map<Source, std::string> source_map = {
{ Source::PrimaryColor, "PrimaryColor" }, { Source::PrimaryColor, "PrimaryColor" },
{ Source::Texture0, "Texture0" }, { Source::Texture0, "Texture0" },
{ Source::Texture1, "Texture1" },
{ Source::Texture2, "Texture2" },
{ Source::Constant, "Constant" }, { Source::Constant, "Constant" },
{ Source::Previous, "Previous" }, { Source::Previous, "Previous" },
}; };
const std::map<ColorModifier, std::string> color_modifier_map = { static const std::map<ColorModifier, std::string> color_modifier_map = {
{ ColorModifier::SourceColor, { "%source.rgb" } } { ColorModifier::SourceColor, { "%source.rgb" } },
{ ColorModifier::SourceAlpha, { "%source.aaa" } },
}; };
const std::map<AlphaModifier, std::string> alpha_modifier_map = { static const std::map<AlphaModifier, std::string> alpha_modifier_map = {
{ AlphaModifier::SourceAlpha, "%source.a" } { AlphaModifier::SourceAlpha, "%source.a" },
{ AlphaModifier::OneMinusSourceAlpha, "(255 - %source.a)" },
}; };
std::map<Operation, std::string> combiner_map = { static const std::map<Operation, std::string> combiner_map = {
{ Operation::Replace, "%source1" }, { Operation::Replace, "%source1" },
{ Operation::Modulate, "(%source1 * %source2) / 255" }, { Operation::Modulate, "(%source1 * %source2) / 255" },
{ Operation::Add, "(%source1 + %source2)" },
{ Operation::Lerp, "lerp(%source1, %source2, %source3)" },
}; };
auto ReplacePattern = static auto ReplacePattern =
[](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string { [](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string {
size_t start = input.find(pattern); size_t start = input.find(pattern);
if (start == std::string::npos) if (start == std::string::npos)
@ -528,8 +615,8 @@ void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages)
ret.replace(start, pattern.length(), replacement); ret.replace(start, pattern.length(), replacement);
return ret; return ret;
}; };
auto GetColorSourceStr = static auto GetColorSourceStr =
[&source_map,&color_modifier_map,&ReplacePattern](const Source& src, const ColorModifier& modifier) { [](const Source& src, const ColorModifier& modifier) {
auto src_it = source_map.find(src); auto src_it = source_map.find(src);
std::string src_str = "Unknown"; std::string src_str = "Unknown";
if (src_it != source_map.end()) if (src_it != source_map.end())
@ -542,8 +629,8 @@ void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages)
return ReplacePattern(modifier_str, "%source", src_str); return ReplacePattern(modifier_str, "%source", src_str);
}; };
auto GetColorCombinerStr = static auto GetColorCombinerStr =
[&](const Regs::TevStageConfig& tev_stage) { [](const Regs::TevStageConfig& tev_stage) {
auto op_it = combiner_map.find(tev_stage.color_op); auto op_it = combiner_map.find(tev_stage.color_op);
std::string op_str = "Unknown op (%source1, %source2, %source3)"; std::string op_str = "Unknown op (%source1, %source2, %source3)";
if (op_it != combiner_map.end()) if (op_it != combiner_map.end())
@ -553,8 +640,8 @@ void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages)
op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2)); op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2));
return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3)); return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3));
}; };
auto GetAlphaSourceStr = static auto GetAlphaSourceStr =
[&source_map,&alpha_modifier_map,&ReplacePattern](const Source& src, const AlphaModifier& modifier) { [](const Source& src, const AlphaModifier& modifier) {
auto src_it = source_map.find(src); auto src_it = source_map.find(src);
std::string src_str = "Unknown"; std::string src_str = "Unknown";
if (src_it != source_map.end()) if (src_it != source_map.end())
@ -567,8 +654,8 @@ void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages)
return ReplacePattern(modifier_str, "%source", src_str); return ReplacePattern(modifier_str, "%source", src_str);
}; };
auto GetAlphaCombinerStr = static auto GetAlphaCombinerStr =
[&](const Regs::TevStageConfig& tev_stage) { [](const Regs::TevStageConfig& tev_stage) {
auto op_it = combiner_map.find(tev_stage.alpha_op); auto op_it = combiner_map.find(tev_stage.alpha_op);
std::string op_str = "Unknown op (%source1, %source2, %source3)"; std::string op_str = "Unknown op (%source1, %source2, %source3)";
if (op_it != combiner_map.end()) if (op_it != combiner_map.end())

View file

@ -26,6 +26,7 @@ public:
CommandProcessed, CommandProcessed,
IncomingPrimitiveBatch, IncomingPrimitiveBatch,
FinishedPrimitiveBatch, FinishedPrimitiveBatch,
VertexLoaded,
NumEvents NumEvents
}; };
@ -192,7 +193,7 @@ void OnPicaRegWrite(u32 id, u32 value);
std::unique_ptr<PicaTrace> FinishPicaTracing(); std::unique_ptr<PicaTrace> FinishPicaTracing();
struct TextureInfo { struct TextureInfo {
unsigned int address; PAddr physical_address;
int width; int width;
int height; int height;
int stride; int stride;
@ -202,7 +203,17 @@ struct TextureInfo {
const Pica::Regs::TextureFormat& format); const Pica::Regs::TextureFormat& format);
}; };
const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info); /**
* Lookup texel located at the given coordinates and return an RGBA vector of its color.
* @param source Source pointer to read data from
* @param s,t Texture coordinates to read from
* @param info TextureInfo object describing the texture setup
* @param disable_alpha This is used for debug widgets which use this method to display textures without providing a good way to visualize alpha by themselves. If true, this will return 255 for the alpha component, and either drop the information entirely or store it in an "unused" color channel.
* @todo Eventually we should get rid of the disable_alpha parameter.
*/
const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const TextureInfo& info,
bool disable_alpha = false);
void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);

View file

@ -8,6 +8,7 @@
#include <cstddef> #include <cstddef>
#include <initializer_list> #include <initializer_list>
#include <map> #include <map>
#include <vector>
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_types.h" #include "common/common_types.h"
@ -104,6 +105,11 @@ struct Regs {
INSERT_PADDING_WORDS(0x17); INSERT_PADDING_WORDS(0x17);
struct TextureConfig { struct TextureConfig {
enum WrapMode : u32 {
ClampToEdge = 0,
Repeat = 2,
};
INSERT_PADDING_WORDS(0x1); INSERT_PADDING_WORDS(0x1);
union { union {
@ -111,12 +117,17 @@ struct Regs {
BitField<16, 16, u32> width; BitField<16, 16, u32> width;
}; };
INSERT_PADDING_WORDS(0x2); union {
BitField< 8, 2, WrapMode> wrap_s;
BitField<11, 2, WrapMode> wrap_t;
};
INSERT_PADDING_WORDS(0x1);
u32 address; u32 address;
u32 GetPhysicalAddress() const { u32 GetPhysicalAddress() const {
return DecodeAddressRegister(address) - Memory::FCRAM_PADDR + Memory::HEAP_LINEAR_VADDR; return DecodeAddressRegister(address);
} }
// texture1 and texture2 store the texture format directly after the address // texture1 and texture2 store the texture format directly after the address
@ -131,36 +142,70 @@ struct Regs {
RGBA5551 = 2, RGBA5551 = 2,
RGB565 = 3, RGB565 = 3,
RGBA4 = 4, RGBA4 = 4,
IA8 = 5,
I8 = 7,
A8 = 8,
IA4 = 9,
A4 = 11,
// TODO: Support for the other formats is not implemented, yet. // TODO: Support for the other formats is not implemented, yet.
// Seems like they are luminance formats and compressed textures. // Seems like they are luminance formats and compressed textures.
}; };
static unsigned BytesPerPixel(TextureFormat format) { static unsigned NibblesPerPixel(TextureFormat format) {
switch (format) { switch (format) {
case TextureFormat::RGBA8: case TextureFormat::RGBA8:
return 4; return 8;
case TextureFormat::RGB8: case TextureFormat::RGB8:
return 3; return 6;
case TextureFormat::RGBA5551: case TextureFormat::RGBA5551:
case TextureFormat::RGB565: case TextureFormat::RGB565:
case TextureFormat::RGBA4: case TextureFormat::RGBA4:
return 2; case TextureFormat::IA8:
return 4;
default: case TextureFormat::A4:
// placeholder for yet unknown formats
return 1; return 1;
case TextureFormat::I8:
case TextureFormat::A8:
case TextureFormat::IA4:
default: // placeholder for yet unknown formats
return 2;
} }
} }
BitField< 0, 1, u32> texturing_enable; union {
BitField< 0, 1, u32> texture0_enable;
BitField< 1, 1, u32> texture1_enable;
BitField< 2, 1, u32> texture2_enable;
};
TextureConfig texture0; TextureConfig texture0;
INSERT_PADDING_WORDS(0x8); INSERT_PADDING_WORDS(0x8);
BitField<0, 4, TextureFormat> texture0_format; BitField<0, 4, TextureFormat> texture0_format;
INSERT_PADDING_WORDS(0x2);
TextureConfig texture1;
BitField<0, 4, TextureFormat> texture1_format;
INSERT_PADDING_WORDS(0x2);
TextureConfig texture2;
BitField<0, 4, TextureFormat> texture2_format;
INSERT_PADDING_WORDS(0x21);
INSERT_PADDING_WORDS(0x31); struct FullTextureConfig {
const bool enabled;
const TextureConfig config;
const TextureFormat format;
};
const std::array<FullTextureConfig, 3> GetTextures() const {
return {{
{ static_cast<bool>(texture0_enable), texture0, texture0_format },
{ static_cast<bool>(texture1_enable), texture1, texture1_format },
{ static_cast<bool>(texture2_enable), texture2, texture2_format }
}};
}
// 0xc0-0xff: Texture Combiner (akin to glTexEnv) // 0xc0-0xff: Texture Combiner (akin to glTexEnv)
struct TevStageConfig { struct TevStageConfig {
@ -282,11 +327,11 @@ struct Regs {
INSERT_PADDING_WORDS(0x1); INSERT_PADDING_WORDS(0x1);
inline u32 GetColorBufferAddress() const { inline u32 GetColorBufferPhysicalAddress() const {
return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(color_buffer_address)); return DecodeAddressRegister(color_buffer_address);
} }
inline u32 GetDepthBufferAddress() const { inline u32 GetDepthBufferPhysicalAddress() const {
return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(depth_buffer_address)); return DecodeAddressRegister(depth_buffer_address);
} }
inline u32 GetWidth() const { inline u32 GetWidth() const {
@ -310,9 +355,8 @@ struct Regs {
BitField<0, 29, u32> base_address; BitField<0, 29, u32> base_address;
inline u32 GetBaseAddress() const { u32 GetPhysicalBaseAddress() const {
// TODO: Ugly, should fix PhysicalToVirtualAddress instead return DecodeAddressRegister(base_address);
return DecodeAddressRegister(base_address) - Memory::FCRAM_PADDR + Memory::HEAP_LINEAR_VADDR;
} }
// Descriptor for internal vertex attributes // Descriptor for internal vertex attributes
@ -448,7 +492,11 @@ struct Regs {
BitField<8, 2, TriangleTopology> triangle_topology; BitField<8, 2, TriangleTopology> triangle_topology;
INSERT_PADDING_WORDS(0x5b); INSERT_PADDING_WORDS(0x51);
BitField<0, 16, u32> vs_bool_uniforms;
INSERT_PADDING_WORDS(0x9);
// Offset to shader program entry point (in words) // Offset to shader program entry point (in words)
BitField<0, 16, u32> vs_main_offset; BitField<0, 16, u32> vs_main_offset;
@ -556,9 +604,13 @@ struct Regs {
ADD_FIELD(viewport_depth_range); ADD_FIELD(viewport_depth_range);
ADD_FIELD(viewport_depth_far_plane); ADD_FIELD(viewport_depth_far_plane);
ADD_FIELD(viewport_corner); ADD_FIELD(viewport_corner);
ADD_FIELD(texturing_enable); ADD_FIELD(texture0_enable);
ADD_FIELD(texture0); ADD_FIELD(texture0);
ADD_FIELD(texture0_format); ADD_FIELD(texture0_format);
ADD_FIELD(texture1);
ADD_FIELD(texture1_format);
ADD_FIELD(texture2);
ADD_FIELD(texture2_format);
ADD_FIELD(tev_stage0); ADD_FIELD(tev_stage0);
ADD_FIELD(tev_stage1); ADD_FIELD(tev_stage1);
ADD_FIELD(tev_stage2); ADD_FIELD(tev_stage2);
@ -572,6 +624,7 @@ struct Regs {
ADD_FIELD(trigger_draw); ADD_FIELD(trigger_draw);
ADD_FIELD(trigger_draw_indexed); ADD_FIELD(trigger_draw_indexed);
ADD_FIELD(triangle_topology); ADD_FIELD(triangle_topology);
ADD_FIELD(vs_bool_uniforms);
ADD_FIELD(vs_main_offset); ADD_FIELD(vs_main_offset);
ADD_FIELD(vs_input_register_map); ADD_FIELD(vs_input_register_map);
ADD_FIELD(vs_uniform_setup); ADD_FIELD(vs_uniform_setup);
@ -622,9 +675,13 @@ ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e);
ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
ASSERT_REG_POSITION(viewport_corner, 0x68); ASSERT_REG_POSITION(viewport_corner, 0x68);
ASSERT_REG_POSITION(texturing_enable, 0x80); ASSERT_REG_POSITION(texture0_enable, 0x80);
ASSERT_REG_POSITION(texture0, 0x81); ASSERT_REG_POSITION(texture0, 0x81);
ASSERT_REG_POSITION(texture0_format, 0x8e); ASSERT_REG_POSITION(texture0_format, 0x8e);
ASSERT_REG_POSITION(texture1, 0x91);
ASSERT_REG_POSITION(texture1_format, 0x96);
ASSERT_REG_POSITION(texture2, 0x99);
ASSERT_REG_POSITION(texture2_format, 0x9e);
ASSERT_REG_POSITION(tev_stage0, 0xc0); ASSERT_REG_POSITION(tev_stage0, 0xc0);
ASSERT_REG_POSITION(tev_stage1, 0xc8); ASSERT_REG_POSITION(tev_stage1, 0xc8);
ASSERT_REG_POSITION(tev_stage2, 0xd0); ASSERT_REG_POSITION(tev_stage2, 0xd0);
@ -638,6 +695,7 @@ ASSERT_REG_POSITION(num_vertices, 0x228);
ASSERT_REG_POSITION(trigger_draw, 0x22e); ASSERT_REG_POSITION(trigger_draw, 0x22e);
ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
ASSERT_REG_POSITION(triangle_topology, 0x25e); ASSERT_REG_POSITION(triangle_topology, 0x25e);
ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0);
ASSERT_REG_POSITION(vs_main_offset, 0x2ba); ASSERT_REG_POSITION(vs_main_offset, 0x2ba);
ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); ASSERT_REG_POSITION(vs_input_register_map, 0x2bb);
ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0);
@ -719,6 +777,14 @@ struct float24 {
return ToFloat32() <= flt.ToFloat32(); return ToFloat32() <= flt.ToFloat32();
} }
bool operator == (const float24& flt) const {
return ToFloat32() == flt.ToFloat32();
}
bool operator != (const float24& flt) const {
return ToFloat32() != flt.ToFloat32();
}
private: private:
// Stored as a regular float, merely for convenience // Stored as a regular float, merely for convenience
// TODO: Perform proper arithmetic on this! // TODO: Perform proper arithmetic on this!
@ -736,5 +802,15 @@ union CommandHeader {
BitField<31, 1, u32> group_commands; BitField<31, 1, u32> group_commands;
}; };
// TODO: Ugly, should fix PhysicalToVirtualAddress instead
inline static u32 PAddrToVAddr(u32 addr) {
if (addr >= Memory::VRAM_PADDR && addr < Memory::VRAM_PADDR + Memory::VRAM_SIZE) {
return addr - Memory::VRAM_PADDR + Memory::VRAM_VADDR;
} else if (addr >= Memory::FCRAM_PADDR && addr < Memory::FCRAM_PADDR + Memory::FCRAM_SIZE) {
return addr - Memory::FCRAM_PADDR + Memory::HEAP_LINEAR_VADDR;
} else {
return 0;
}
}
} // namespace } // namespace

View file

@ -30,20 +30,27 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandl
} }
break; break;
case Regs::TriangleTopology::Strip:
case Regs::TriangleTopology::Fan: case Regs::TriangleTopology::Fan:
if (buffer_index == 2) { if (strip_ready) {
buffer_index = 0; // TODO: Should be "buffer[0], buffer[1], vtx" instead!
// Not quite sure why we need this order for things to show up properly.
// Maybe a bug in the rasterizer?
triangle_handler(buffer[1], buffer[0], vtx);
}
buffer[buffer_index] = vtx;
triangle_handler(buffer[0], buffer[1], vtx); if (topology == Regs::TriangleTopology::Strip) {
strip_ready |= (buffer_index == 1);
buffer[1] = vtx; buffer_index = !buffer_index;
} else { } else if (topology == Regs::TriangleTopology::Fan) {
buffer[buffer_index++] = vtx; buffer_index = 1;
strip_ready = true;
} }
break; break;
default: default:
LOG_ERROR(Render_Software, "Unknown triangle topology %x:", (int)topology); LOG_ERROR(HW_GPU, "Unknown triangle topology %x:", (int)topology);
break; break;
} }
} }

View file

@ -37,6 +37,7 @@ private:
int buffer_index; int buffer_index;
VertexType buffer[2]; VertexType buffer[2];
bool strip_ready = false;
}; };

View file

@ -18,7 +18,7 @@ namespace Pica {
namespace Rasterizer { namespace Rasterizer {
static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress()); u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())));
u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b();
// Assuming RGBA8 format until actual framebuffer format handling is implemented // Assuming RGBA8 format until actual framebuffer format handling is implemented
@ -26,14 +26,14 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
} }
static u32 GetDepth(int x, int y) { static u32 GetDepth(int x, int y) {
u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
// Assuming 16-bit depth buffer format until actual format handling is implemented // Assuming 16-bit depth buffer format until actual format handling is implemented
return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); return *(depth_buffer + x + y * registers.framebuffer.GetWidth());
} }
static void SetDepth(int x, int y, u16 value) { static void SetDepth(int x, int y, u16 value) {
u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())));
// Assuming 16-bit depth buffer format until actual format handling is implemented // Assuming 16-bit depth buffer format until actual format handling is implemented
*(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value;
@ -167,60 +167,48 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
(u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255)
}; };
Math::Vec4<u8> texture_color{}; Math::Vec2<float24> uv[3];
float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u());
float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); uv[0].v() = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v());
if (registers.texturing_enable) { uv[1].u() = GetInterpolatedAttribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u());
// Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each uv[1].v() = GetInterpolatedAttribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v());
// of which is composed of four 2x2 subtiles each of which is composed of four texels. uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u());
// Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v());
// texels are laid out in a 2x2 subtile like this:
// 2 3
// 0 1
//
// The full 8x8 tile has the texels arranged like this:
//
// 42 43 46 47 58 59 62 63
// 40 41 44 45 56 57 60 61
// 34 35 38 39 50 51 54 55
// 32 33 36 37 48 49 52 53
// 10 11 14 15 26 27 30 31
// 08 09 12 13 24 25 28 29
// 02 03 06 07 18 19 22 23
// 00 01 04 05 16 17 20 21
// TODO: This is currently hardcoded for RGB8 Math::Vec4<u8> texture_color[3]{};
u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress()); for (int i = 0; i < 3; ++i) {
auto texture = registers.GetTextures()[i];
if (!texture.enabled)
continue;
// TODO(neobrain): Not sure if this swizzling pattern is used for all textures. _dbg_assert_(HW_GPU, 0 != texture.config.address);
// To be flexible in case different but similar patterns are used, we keep this
// somewhat inefficient code around for now.
int s = (int)(u * float24::FromFloat32(static_cast<float>(registers.texture0.width))).ToFloat32();
int t = (int)(v * float24::FromFloat32(static_cast<float>(registers.texture0.height))).ToFloat32();
int texel_index_within_tile = 0;
for (int block_size_index = 0; block_size_index < 3; ++block_size_index) {
int sub_tile_width = 1 << block_size_index;
int sub_tile_height = 1 << block_size_index;
int sub_tile_index = (s & sub_tile_width) << block_size_index; int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32();
sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index); int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32();
texel_index_within_tile += sub_tile_index; auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) {
switch (mode) {
case Regs::TextureConfig::ClampToEdge:
val = std::max(val, 0);
val = std::min(val, (int)size - 1);
return val;
case Regs::TextureConfig::Repeat:
return (int)(((unsigned)val) % size);
default:
LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode);
_dbg_assert_(HW_GPU, 0);
return 0;
} }
};
s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width);
t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height);
const int block_width = 8; u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress()));
const int block_height = 8; auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
int coarse_s = (s / block_width) * block_width; texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info);
int coarse_t = (t / block_height) * block_height; DebugUtils::DumpTexture(texture.config, texture_data);
const int row_stride = registers.texture0.width * 3;
u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3;
texture_color.r() = source_ptr[2];
texture_color.g() = source_ptr[1];
texture_color.b() = source_ptr[0];
texture_color.a() = 0xFF;
DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data);
} }
// Texture environment - consists of 6 stages of color and alpha combining. // Texture environment - consists of 6 stages of color and alpha combining.
@ -237,22 +225,29 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
using AlphaModifier = Regs::TevStageConfig::AlphaModifier; using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
using Operation = Regs::TevStageConfig::Operation; using Operation = Regs::TevStageConfig::Operation;
auto GetColorSource = [&](Source source) -> Math::Vec3<u8> { auto GetColorSource = [&](Source source) -> Math::Vec4<u8> {
switch (source) { switch (source) {
case Source::PrimaryColor: case Source::PrimaryColor:
return primary_color.rgb(); return primary_color;
case Source::Texture0: case Source::Texture0:
return texture_color.rgb(); return texture_color[0];
case Source::Texture1:
return texture_color[1];
case Source::Texture2:
return texture_color[2];
case Source::Constant: case Source::Constant:
return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b}; return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a};
case Source::Previous: case Source::Previous:
return combiner_output.rgb(); return combiner_output;
default: default:
LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source); LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
_dbg_assert_(HW_GPU, 0);
return {}; return {};
} }
}; };
@ -263,7 +258,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
return primary_color.a(); return primary_color.a();
case Source::Texture0: case Source::Texture0:
return texture_color.a(); return texture_color[0].a();
case Source::Texture1:
return texture_color[1].a();
case Source::Texture2:
return texture_color[2].a();
case Source::Constant: case Source::Constant:
return tev_stage.const_a; return tev_stage.const_a;
@ -273,17 +274,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
default: default:
LOG_ERROR(HW_GPU, "Unknown alpha combiner source %d\n", (int)source); LOG_ERROR(HW_GPU, "Unknown alpha combiner source %d\n", (int)source);
_dbg_assert_(HW_GPU, 0);
return 0; return 0;
} }
}; };
auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> { auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
switch (factor) switch (factor)
{ {
case ColorModifier::SourceColor: case ColorModifier::SourceColor:
return values; return values.rgb();
case ColorModifier::SourceAlpha:
return { values.a(), values.a(), values.a() };
default: default:
LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor); LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor);
_dbg_assert_(HW_GPU, 0);
return {}; return {};
} }
}; };
@ -292,8 +299,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
switch (factor) { switch (factor) {
case AlphaModifier::SourceAlpha: case AlphaModifier::SourceAlpha:
return value; return value;
case AlphaModifier::OneMinusSourceAlpha:
return 255 - value;
default: default:
LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor); LOG_ERROR(HW_GPU, "Unknown alpha factor %d\n", (int)factor);
_dbg_assert_(HW_GPU, 0);
return 0; return 0;
} }
}; };
@ -306,8 +318,21 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
case Operation::Modulate: case Operation::Modulate:
return ((input[0] * input[1]) / 255).Cast<u8>(); return ((input[0] * input[1]) / 255).Cast<u8>();
case Operation::Add:
{
auto result = input[0] + input[1];
result.r() = std::min(255, result.r());
result.g() = std::min(255, result.g());
result.b() = std::min(255, result.b());
return result.Cast<u8>();
}
case Operation::Lerp:
return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
default: default:
LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
_dbg_assert_(HW_GPU, 0);
return {}; return {};
} }
}; };
@ -320,8 +345,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
case Operation::Modulate: case Operation::Modulate:
return input[0] * input[1] / 255; return input[0] * input[1] / 255;
case Operation::Add:
return std::min(255, input[0] + input[1]);
case Operation::Lerp:
return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
default: default:
LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op);
_dbg_assert_(HW_GPU, 0);
return 0; return 0;
} }
}; };

View file

@ -2,16 +2,25 @@
// Licensed under GPLv2 // Licensed under GPLv2
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <stack>
#include <boost/range/algorithm.hpp> #include <boost/range/algorithm.hpp>
#include <common/file_util.h> #include <common/file_util.h>
#include <core/mem_map.h> #include <core/mem_map.h>
#include "debug_utils/debug_utils.h" #include <nihstro/shader_bytecode.h>
#include "pica.h" #include "pica.h"
#include "vertex_shader.h" #include "vertex_shader.h"
#include "debug_utils/debug_utils.h"
using nihstro::Instruction;
using nihstro::RegisterType;
using nihstro::SourceRegister;
using nihstro::SwizzlePattern;
namespace Pica { namespace Pica {
@ -19,13 +28,14 @@ namespace VertexShader {
static struct { static struct {
Math::Vec4<float24> f[96]; Math::Vec4<float24> f[96];
} shader_uniforms;
std::array<bool,16> b;
} shader_uniforms;
// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
// For now, we just keep these local arrays around. // For now, we just keep these local arrays around.
static u32 shader_memory[1024]; static std::array<u32, 1024> shader_memory;
static u32 swizzle_data[1024]; static std::array<u32, 1024> swizzle_data;
void SubmitShaderMemoryChange(u32 addr, u32 value) void SubmitShaderMemoryChange(u32 addr, u32 value)
{ {
@ -42,6 +52,21 @@ Math::Vec4<float24>& GetFloatUniform(u32 index)
return shader_uniforms.f[index]; return shader_uniforms.f[index];
} }
bool& GetBoolUniform(u32 index)
{
return shader_uniforms.b[index];
}
const std::array<u32, 1024>& GetShaderBinary()
{
return shader_memory;
}
const std::array<u32, 1024>& GetSwizzlePatterns()
{
return swizzle_data;
}
struct VertexShaderState { struct VertexShaderState {
u32* program_counter; u32* program_counter;
@ -49,13 +74,23 @@ struct VertexShaderState {
float24* output_register_table[7*4]; float24* output_register_table[7*4];
Math::Vec4<float24> temporary_registers[16]; Math::Vec4<float24> temporary_registers[16];
bool status_registers[2]; bool conditional_code[2];
// Two Address registers and one loop counter
// TODO: How many bits do these actually have?
s32 address_registers[3];
enum { enum {
INVALID_ADDRESS = 0xFFFFFFFF INVALID_ADDRESS = 0xFFFFFFFF
}; };
u32 call_stack[8]; // TODO: What is the maximal call stack depth?
u32* call_stack_pointer; struct CallStackElement {
u32 final_address;
u32 return_address;
};
// TODO: Is there a maximal size for this?
std::stack<CallStackElement> call_stack;
struct { struct {
u32 max_offset; // maximum program counter ever reached u32 max_offset; // maximum program counter ever reached
@ -64,25 +99,69 @@ struct VertexShaderState {
}; };
static void ProcessShaderCode(VertexShaderState& state) { static void ProcessShaderCode(VertexShaderState& state) {
// Placeholder for invalid inputs
static float24 dummy_vec4_float24[4];
while (true) { while (true) {
bool increment_pc = true; if (!state.call_stack.empty()) {
if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) {
state.program_counter = &shader_memory[state.call_stack.top().return_address];
state.call_stack.pop();
// TODO: Is "trying again" accurate to hardware?
continue;
}
}
bool exit_loop = false; bool exit_loop = false;
const Instruction& instr = *(const Instruction*)state.program_counter; const Instruction& instr = *(const Instruction*)state.program_counter;
state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory));
const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()]
: (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x
: (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x
: nullptr;
const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()]
: &state.temporary_registers[instr.common.src2.GetIndex()].x;
float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()]
: (instr.common.dest < 0x10) ? nullptr
: (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0]
: nullptr;
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
const bool negate_src1 = (swizzle.negate != 0);
auto call = [&](VertexShaderState& state, u32 offset, u32 num_instructions, u32 return_offset) {
state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
state.call_stack.push({ offset + num_instructions, return_offset });
};
u32 binary_offset = state.program_counter - shader_memory.data();
state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + binary_offset);
auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
switch (source_reg.GetRegisterType()) {
case RegisterType::Input:
return state.input_register_table[source_reg.GetIndex()];
case RegisterType::Temporary:
return &state.temporary_registers[source_reg.GetIndex()].x;
case RegisterType::FloatUniform:
return &shader_uniforms.f[source_reg.GetIndex()].x;
default:
return dummy_vec4_float24;
}
};
switch (instr.opcode.GetInfo().type) {
case Instruction::OpCodeType::Arithmetic:
{
bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed);
if (is_inverted) {
// TODO: We don't really support this properly: For instance, the address register
// offset needs to be applied to SRC2 instead, etc.
// For now, we just abort in this situation.
LOG_CRITICAL(HW_GPU, "Bad condition...");
exit(0);
}
const int address_offset = (instr.common.address_register_index == 0)
? 0 : state.address_registers[instr.common.address_register_index - 1];
const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset);
const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted));
const bool negate_src1 = (swizzle.negate_src1 != false);
const bool negate_src2 = (swizzle.negate_src2 != false);
float24 src1[4] = { float24 src1[4] = {
src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(0)],
@ -96,17 +175,29 @@ static void ProcessShaderCode(VertexShaderState& state) {
src1[2] = src1[2] * float24::FromFloat32(-1); src1[2] = src1[2] * float24::FromFloat32(-1);
src1[3] = src1[3] * float24::FromFloat32(-1); src1[3] = src1[3] * float24::FromFloat32(-1);
} }
const float24 src2[4] = { float24 src2[4] = {
src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(0)],
src2_[(int)swizzle.GetSelectorSrc2(1)], src2_[(int)swizzle.GetSelectorSrc2(1)],
src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(2)],
src2_[(int)swizzle.GetSelectorSrc2(3)], src2_[(int)swizzle.GetSelectorSrc2(3)],
}; };
if (negate_src2) {
src2[0] = src2[0] * float24::FromFloat32(-1);
src2[1] = src2[1] * float24::FromFloat32(-1);
src2[2] = src2[2] * float24::FromFloat32(-1);
src2[3] = src2[3] * float24::FromFloat32(-1);
}
switch (instr.opcode) { float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()]
: (instr.common.dest < 0x10) ? dummy_vec4_float24
: (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0]
: dummy_vec4_float24;
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
switch (instr.opcode.EffectiveOpCode()) {
case Instruction::OpCode::ADD: case Instruction::OpCode::ADD:
{ {
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i)) if (!swizzle.DestComponentEnabled(i))
continue; continue;
@ -119,7 +210,6 @@ static void ProcessShaderCode(VertexShaderState& state) {
case Instruction::OpCode::MUL: case Instruction::OpCode::MUL:
{ {
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i)) if (!swizzle.DestComponentEnabled(i))
continue; continue;
@ -130,10 +220,18 @@ static void ProcessShaderCode(VertexShaderState& state) {
break; break;
} }
case Instruction::OpCode::MAX:
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
continue;
dest[i] = std::max(src1[i], src2[i]);
}
break;
case Instruction::OpCode::DP3: case Instruction::OpCode::DP3:
case Instruction::OpCode::DP4: case Instruction::OpCode::DP4:
{ {
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
float24 dot = float24::FromFloat32(0.f); float24 dot = float24::FromFloat32(0.f);
int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
for (int i = 0; i < num_components; ++i) for (int i = 0; i < num_components; ++i)
@ -151,7 +249,6 @@ static void ProcessShaderCode(VertexShaderState& state) {
// Reciprocal // Reciprocal
case Instruction::OpCode::RCP: case Instruction::OpCode::RCP:
{ {
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i)) if (!swizzle.DestComponentEnabled(i))
continue; continue;
@ -167,7 +264,6 @@ static void ProcessShaderCode(VertexShaderState& state) {
// Reciprocal Square Root // Reciprocal Square Root
case Instruction::OpCode::RSQ: case Instruction::OpCode::RSQ:
{ {
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i)) if (!swizzle.DestComponentEnabled(i))
continue; continue;
@ -180,9 +276,21 @@ static void ProcessShaderCode(VertexShaderState& state) {
break; break;
} }
case Instruction::OpCode::MOVA:
{
for (int i = 0; i < 2; ++i) {
if (!swizzle.DestComponentEnabled(i))
continue;
// TODO: Figure out how the rounding is done on hardware
state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32());
}
break;
}
case Instruction::OpCode::MOV: case Instruction::OpCode::MOV:
{ {
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i)) if (!swizzle.DestComponentEnabled(i))
continue; continue;
@ -192,38 +300,136 @@ static void ProcessShaderCode(VertexShaderState& state) {
break; break;
} }
case Instruction::OpCode::RET: case Instruction::OpCode::CMP:
if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { for (int i = 0; i < 2; ++i) {
exit_loop = true; // TODO: Can you restrict to one compare via dest masking?
} else {
// Jump back to call stack position, invalidate call stack entry, move up call stack pointer
state.program_counter = &shader_memory[*state.call_stack_pointer];
*state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS;
}
auto compare_op = instr.common.compare_op;
auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value();
switch (op) {
case compare_op.Equal:
state.conditional_code[i] = (src1[i] == src2[i]);
break; break;
case Instruction::OpCode::CALL: case compare_op.NotEqual:
increment_pc = false; state.conditional_code[i] = (src1[i] != src2[i]);
_dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack));
*++state.call_stack_pointer = state.program_counter - shader_memory;
// TODO: Does this offset refer to the beginning of shader memory?
state.program_counter = &shader_memory[instr.flow_control.offset_words];
break; break;
case Instruction::OpCode::FLS: case compare_op.LessThan:
// TODO: Do whatever needs to be done here? state.conditional_code[i] = (src1[i] < src2[i]);
break;
case compare_op.LessEqual:
state.conditional_code[i] = (src1[i] <= src2[i]);
break;
case compare_op.GreaterThan:
state.conditional_code[i] = (src1[i] > src2[i]);
break;
case compare_op.GreaterEqual:
state.conditional_code[i] = (src1[i] >= src2[i]);
break; break;
default: default:
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast<int>(op));
(int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); break;
}
}
break;
default:
LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
(int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
_dbg_assert_(HW_GPU, 0);
break;
}
break;
}
default:
// Handle each instruction on its own
switch (instr.opcode) {
case Instruction::OpCode::END:
exit_loop = true;
break;
case Instruction::OpCode::CALL:
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
binary_offset + 1);
break;
case Instruction::OpCode::NOP:
break;
case Instruction::OpCode::IFU:
if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) {
call(state,
binary_offset + 1,
instr.flow_control.dest_offset - binary_offset - 1,
instr.flow_control.dest_offset + instr.flow_control.num_instructions);
} else {
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
instr.flow_control.dest_offset + instr.flow_control.num_instructions);
}
break;
case Instruction::OpCode::IFC:
{
// TODO: Do we need to consider swizzlers here?
auto flow_control = instr.flow_control;
bool results[3] = { flow_control.refx == state.conditional_code[0],
flow_control.refy == state.conditional_code[1] };
switch (flow_control.op) {
case flow_control.Or:
results[2] = results[0] || results[1];
break;
case flow_control.And:
results[2] = results[0] && results[1];
break;
case flow_control.JustX:
results[2] = results[0];
break;
case flow_control.JustY:
results[2] = results[1];
break;
}
if (results[2]) {
call(state,
binary_offset + 1,
instr.flow_control.dest_offset - binary_offset - 1,
instr.flow_control.dest_offset + instr.flow_control.num_instructions);
} else {
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
instr.flow_control.dest_offset + instr.flow_control.num_instructions);
}
break;
}
default:
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
(int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
break;
}
break; break;
} }
if (increment_pc)
++state.program_counter; ++state.program_counter;
if (exit_loop) if (exit_loop)
@ -275,13 +481,11 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp];
} }
state.status_registers[0] = false; state.conditional_code[0] = false;
state.status_registers[1] = false; state.conditional_code[1] = false;
boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS);
state.call_stack_pointer = &state.call_stack[0];
ProcessShaderCode(state); ProcessShaderCode(state);
DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
state.debug.max_opdesc_id, registers.vs_main_offset, state.debug.max_opdesc_id, registers.vs_main_offset,
registers.vs_output_attributes); registers.vs_output_attributes);

View file

@ -27,15 +27,18 @@ struct OutputVertex {
Math::Vec4<float24> dummy; // quaternions (not implemented, yet) Math::Vec4<float24> dummy; // quaternions (not implemented, yet)
Math::Vec4<float24> color; Math::Vec4<float24> color;
Math::Vec2<float24> tc0; Math::Vec2<float24> tc0;
Math::Vec2<float24> tc1;
float24 pad[6];
Math::Vec2<float24> tc2;
// Padding for optimal alignment // Padding for optimal alignment
float24 pad[14]; float24 pad2[4];
// Attributes used to store intermediate results // Attributes used to store intermediate results
// position after perspective divide // position after perspective divide
Math::Vec3<float24> screenpos; Math::Vec3<float24> screenpos;
float24 pad2; float24 pad3;
// Linear interpolation // Linear interpolation
// factor: 0=this, 1=vtx // factor: 0=this, 1=vtx
@ -44,6 +47,8 @@ struct OutputVertex {
// TODO: Should perform perspective correct interpolation here... // TODO: Should perform perspective correct interpolation here...
tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor);
tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);
screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
@ -61,222 +66,16 @@ struct OutputVertex {
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
union Instruction {
enum class OpCode : u32 {
ADD = 0x0,
DP3 = 0x1,
DP4 = 0x2,
MUL = 0x8,
MAX = 0xC,
MIN = 0xD,
RCP = 0xE,
RSQ = 0xF,
MOV = 0x13,
RET = 0x21,
FLS = 0x22, // Flush
CALL = 0x24,
};
std::string GetOpCodeName() const {
std::map<OpCode, std::string> map = {
{ OpCode::ADD, "ADD" },
{ OpCode::DP3, "DP3" },
{ OpCode::DP4, "DP4" },
{ OpCode::MUL, "MUL" },
{ OpCode::MAX, "MAX" },
{ OpCode::MIN, "MIN" },
{ OpCode::RCP, "RCP" },
{ OpCode::RSQ, "RSQ" },
{ OpCode::MOV, "MOV" },
{ OpCode::RET, "RET" },
{ OpCode::FLS, "FLS" },
};
auto it = map.find(opcode);
if (it == map.end())
return "UNK";
else
return it->second;
}
u32 hex;
BitField<0x1a, 0x6, OpCode> opcode;
// General notes:
//
// When two input registers are used, one of them uses a 5-bit index while the other
// one uses a 7-bit index. This is because at most one floating point uniform may be used
// as an input.
// Format used e.g. by arithmetic instructions and comparisons
// "src1" and "src2" specify register indices (i.e. indices referring to groups of 4 floats),
// while "dest" addresses individual floats.
union {
BitField<0x00, 0x5, u32> operand_desc_id;
template<class BitFieldType>
struct SourceRegister : BitFieldType {
enum RegisterType {
Input,
Temporary,
FloatUniform
};
RegisterType GetRegisterType() const {
if (BitFieldType::Value() < 0x10)
return Input;
else if (BitFieldType::Value() < 0x20)
return Temporary;
else
return FloatUniform;
}
int GetIndex() const {
if (GetRegisterType() == Input)
return BitFieldType::Value();
else if (GetRegisterType() == Temporary)
return BitFieldType::Value() - 0x10;
else // if (GetRegisterType() == FloatUniform)
return BitFieldType::Value() - 0x20;
}
std::string GetRegisterName() const {
std::map<RegisterType, std::string> type = {
{ Input, "i" },
{ Temporary, "t" },
{ FloatUniform, "f" },
};
return type[GetRegisterType()] + std::to_string(GetIndex());
}
};
SourceRegister<BitField<0x07, 0x5, u32>> src2;
SourceRegister<BitField<0x0c, 0x7, u32>> src1;
struct : BitField<0x15, 0x5, u32>
{
enum RegisterType {
Output,
Temporary,
Unknown
};
RegisterType GetRegisterType() const {
if (Value() < 0x8)
return Output;
else if (Value() < 0x10)
return Unknown;
else
return Temporary;
}
int GetIndex() const {
if (GetRegisterType() == Output)
return Value();
else if (GetRegisterType() == Temporary)
return Value() - 0x10;
else
return Value();
}
std::string GetRegisterName() const {
std::map<RegisterType, std::string> type = {
{ Output, "o" },
{ Temporary, "t" },
{ Unknown, "u" }
};
return type[GetRegisterType()] + std::to_string(GetIndex());
}
} dest;
} common;
// Format used for flow control instructions ("if")
union {
BitField<0x00, 0x8, u32> num_instructions;
BitField<0x0a, 0xc, u32> offset_words;
} flow_control;
};
static_assert(std::is_standard_layout<Instruction>::value, "Structure is not using standard layout!");
union SwizzlePattern {
u32 hex;
enum class Selector : u32 {
x = 0,
y = 1,
z = 2,
w = 3
};
Selector GetSelectorSrc1(int comp) const {
Selector selectors[] = {
src1_selector_0, src1_selector_1, src1_selector_2, src1_selector_3
};
return selectors[comp];
}
Selector GetSelectorSrc2(int comp) const {
Selector selectors[] = {
src2_selector_0, src2_selector_1, src2_selector_2, src2_selector_3
};
return selectors[comp];
}
bool DestComponentEnabled(int i) const {
return (dest_mask & (0x8 >> i)) != 0;
}
std::string SelectorToString(bool src2) const {
std::map<Selector, std::string> map = {
{ Selector::x, "x" },
{ Selector::y, "y" },
{ Selector::z, "z" },
{ Selector::w, "w" }
};
std::string ret;
for (int i = 0; i < 4; ++i) {
ret += map.at(src2 ? GetSelectorSrc2(i) : GetSelectorSrc1(i));
}
return ret;
}
std::string DestMaskToString() const {
std::string ret;
for (int i = 0; i < 4; ++i) {
if (!DestComponentEnabled(i))
ret += "_";
else
ret += "xyzw"[i];
}
return ret;
}
// Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x
BitField< 0, 4, u32> dest_mask;
BitField< 4, 1, u32> negate; // negates src1
BitField< 5, 2, Selector> src1_selector_3;
BitField< 7, 2, Selector> src1_selector_2;
BitField< 9, 2, Selector> src1_selector_1;
BitField<11, 2, Selector> src1_selector_0;
BitField<14, 2, Selector> src2_selector_3;
BitField<16, 2, Selector> src2_selector_2;
BitField<18, 2, Selector> src2_selector_1;
BitField<20, 2, Selector> src2_selector_0;
BitField<31, 1, u32> flag; // not sure what this means, maybe it's the sign?
};
void SubmitShaderMemoryChange(u32 addr, u32 value); void SubmitShaderMemoryChange(u32 addr, u32 value);
void SubmitSwizzleDataChange(u32 addr, u32 value); void SubmitSwizzleDataChange(u32 addr, u32 value);
OutputVertex RunShader(const InputVertex& input, int num_attributes); OutputVertex RunShader(const InputVertex& input, int num_attributes);
Math::Vec4<float24>& GetFloatUniform(u32 index); Math::Vec4<float24>& GetFloatUniform(u32 index);
bool& GetBoolUniform(u32 index);
const std::array<u32, 1024>& GetShaderBinary();
const std::array<u32, 1024>& GetSwizzlePatterns();
} // namespace } // namespace