citra/src/video_core/shader/shader.h

// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.

#pragma once

#include <array>
#include <cstddef>
#include <memory>
#include <type_traits>
#include <vector>

#include <boost/container/static_vector.hpp>

#include <nihstro/shader_bytecode.h>

#include "common/assert.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/vector_math.h"

#include "video_core/pica.h"
#include "video_core/pica_types.h"

using nihstro::RegisterType;
using nihstro::SourceRegister;
using nihstro::DestRegister;

namespace Pica {

namespace Shader {

struct InputVertex {
    alignas(16) Math::Vec4<float24> attr[16];
};

struct OutputVertex {
    OutputVertex() = default;

    // VS output attributes
    Math::Vec4<float24> pos;
    Math::Vec4<float24> quat;
    Math::Vec4<float24> color;
    Math::Vec2<float24> tc0;
    Math::Vec2<float24> tc1;
    float24 tc0_w;
    INSERT_PADDING_WORDS(1);
    Math::Vec3<float24> view;
    INSERT_PADDING_WORDS(1);
    Math::Vec2<float24> tc2;

    // Padding for optimal alignment
    INSERT_PADDING_WORDS(4);

    // Attributes used to store intermediate results

    // position after perspective divide
    Math::Vec3<float24> screenpos;
    INSERT_PADDING_WORDS(1);

    // Linear interpolation
    // factor: 0=this, 1=vtx
    void Lerp(float24 factor, const OutputVertex& vtx) {
        pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);

        // TODO: Should perform perspective correct interpolation here...
        tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
        tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor);
        tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);

        screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);

        color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
    }

    // Linear interpolation
    // factor: 0=v0, 1=v1
    static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) {
        OutputVertex ret = v0;
        ret.Lerp(factor, v1);
        return ret;
    }
};
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");

// Helper structure used to keep track of data useful for inspection of shader emulation
template<bool full_debugging>
struct DebugData;

template<>
struct DebugData<false> {
    // TODO: Hide these behind and interface and move them to DebugData<true>
    u32 max_offset; // maximum program counter ever reached
    u32 max_opdesc_id; // maximum swizzle pattern index ever used
};

template<>
struct DebugData<true> {
    // Records store the input and output operands of a particular instruction.
    struct Record {
        enum Type {
            // Floating point arithmetic operands
            SRC1         = 0x1,
            SRC2         = 0x2,
            SRC3         = 0x4,

            // Initial and final output operand value
            DEST_IN      = 0x8,
            DEST_OUT     = 0x10,

            // Current and next instruction offset (in words)
            CUR_INSTR    = 0x20,
            NEXT_INSTR   = 0x40,

            // Output address register value
            ADDR_REG_OUT = 0x80,

            // Result of a comparison instruction
            CMP_RESULT   = 0x100,

            // Input values for conditional flow control instructions
            COND_BOOL_IN = 0x200,
            COND_CMP_IN  = 0x400,

            // Input values for a loop
            LOOP_INT_IN  = 0x800,
        };

        Math::Vec4<float24> src1;
        Math::Vec4<float24> src2;
        Math::Vec4<float24> src3;

        Math::Vec4<float24> dest_in;
        Math::Vec4<float24> dest_out;

        s32 address_registers[2];
        bool conditional_code[2];
        bool cond_bool;
        bool cond_cmp[2];
        Math::Vec4<u8> loop_int;

        u32 instruction_offset;
        u32 next_instruction;

        // set of enabled fields (as a combination of Type flags)
        unsigned mask = 0;
    };

    u32 max_offset; // maximum program counter ever reached
    u32 max_opdesc_id; // maximum swizzle pattern index ever used

    // List of records for each executed shader instruction
    std::vector<DebugData<true>::Record> records;
};

// Type alias for better readability
using DebugDataRecord = DebugData<true>::Record;

// Helper function to set a DebugData<true>::Record field based on the template enum parameter.
template<DebugDataRecord::Type type, typename ValueType>
inline void SetField(DebugDataRecord& record, ValueType value);

template<>
inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) {
    record.src1.x = value[0];
    record.src1.y = value[1];
    record.src1.z = value[2];
    record.src1.w = value[3];
}

template<>
inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) {
    record.src2.x = value[0];
    record.src2.y = value[1];
    record.src2.z = value[2];
    record.src2.w = value[3];
}

template<>
inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) {
    record.src3.x = value[0];
    record.src3.y = value[1];
    record.src3.z = value[2];
    record.src3.w = value[3];
}

template<>
inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {
    record.dest_in.x = value[0];
    record.dest_in.y = value[1];
    record.dest_in.z = value[2];
    record.dest_in.w = value[3];
}

template<>
inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) {
    record.dest_out.x = value[0];
    record.dest_out.y = value[1];
    record.dest_out.z = value[2];
    record.dest_out.w = value[3];
}

template<>
inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) {
    record.address_registers[0] = value[0];
    record.address_registers[1] = value[1];
}

template<>
inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) {
    record.conditional_code[0] = value[0];
    record.conditional_code[1] = value[1];
}

template<>
inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) {
    record.cond_bool = value;
}

template<>
inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) {
    record.cond_cmp[0] = value[0];
    record.cond_cmp[1] = value[1];
}

template<>
inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) {
    record.loop_int = value;
}

template<>
inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) {
    record.instruction_offset = value;
}

template<>
inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) {
    record.next_instruction = value;
}

// Helper function to set debug information on the current shader iteration.
template<DebugDataRecord::Type type, typename ValueType>
inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) {
    // Debugging disabled => nothing to do
}

template<DebugDataRecord::Type type, typename ValueType>
inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) {
    if (offset >= debug_data.records.size())
        debug_data.records.resize(offset + 1);

   SetField<type, ValueType>(debug_data.records[offset], value);
   debug_data.records[offset].mask |= type;
}


/**
 * This structure contains the state information that needs to be unique for a shader unit. The 3DS
 * has four shader units that process shaders in parallel. At the present, Citra only implements a
 * single shader unit that processes all shaders serially. Putting the state information in a struct
 * here will make it easier for us to parallelize the shader processing later.
 */
template<bool Debug>
struct UnitState {
    struct Registers {
        // The registers are accessed by the shader JIT using SSE instructions, and are therefore
        // required to be 16-byte aligned.
        alignas(16) Math::Vec4<float24> input[16];
        alignas(16) Math::Vec4<float24> output[16];
        alignas(16) Math::Vec4<float24> temporary[16];
    } registers;
    static_assert(std::is_pod<Registers>::value, "Structure is not POD");

    bool conditional_code[2];

    // Two Address registers and one loop counter
    // TODO: How many bits do these actually have?
    s32 address_registers[3];

    DebugData<Debug> debug;

    static size_t InputOffset(const SourceRegister& reg) {
        switch (reg.GetRegisterType()) {
        case RegisterType::Input:
            return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);

        case RegisterType::Temporary:
            return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);

        default:
            UNREACHABLE();
            return 0;
        }
    }

    static size_t OutputOffset(const DestRegister& reg) {
        switch (reg.GetRegisterType()) {
        case RegisterType::Output:
            return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);

        case RegisterType::Temporary:
            return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);

        default:
            UNREACHABLE();
            return 0;
        }
    }
};

/// Clears the shader cache
void ClearCache();

struct ShaderSetup {

    struct {
        // The float uniforms are accessed by the shader JIT using SSE instructions, and are
        // therefore required to be 16-byte aligned.
        alignas(16) Math::Vec4<float24> f[96];

        std::array<bool, 16> b;
        std::array<Math::Vec4<u8>, 4> i;
    } uniforms;

    std::array<u32, 1024> program_code;
    std::array<u32, 1024> swizzle_data;

    /**
     * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
     * vertex, which would happen within the `Run` function).
     */
    void Setup();

    /**
     * Runs the currently setup shader
     * @param state Shader unit state, must be setup per shader and per shader unit
     * @param input Input vertex into the shader
     * @param num_attributes The number of vertex shader attributes
     * @return The output vertex, after having been processed by the vertex shader
     */
    OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);

    /**
     * Produce debug information based on the given shader and input vertex
     * @param input Input vertex into the shader
     * @param num_attributes The number of vertex shader attributes
     * @param config Configuration object for the shader pipeline
     * @param setup Setup object for the shader pipeline
     * @return Debug information for this shader with regards to the given vertex
     */
    DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup);

};

} // namespace Shader

} // namespace Pica
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00			`// Copyright 2015 Citra Emulator Project`
			`// Licensed under GPLv2 or any later version`
			`// Refer to the license.txt file included.`

			`#pragma once`

VideoCore: Run include-what-you-use and fix most includes. 2016-04-30 16:34:51 +01:00			`#include <array>`
			`#include <cstddef>`
			`#include <memory>`
			`#include <type_traits>`
Introduce a shader tracer to allow inspection of input/output values for each processed instruction. 2015-07-12 00:57:59 +01:00			`#include <vector>`

Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00			`#include <boost/container/static_vector.hpp>`
Introduce a shader tracer to allow inspection of input/output values for each processed instruction. 2015-07-12 00:57:59 +01:00
VideoCore: Run include-what-you-use and fix most includes. 2016-04-30 16:34:51 +01:00			`#include <nihstro/shader_bytecode.h>`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00
VideoCore: Run include-what-you-use and fix most includes. 2016-04-30 16:34:51 +01:00			`#include "common/assert.h"`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00			`#include "common/common_funcs.h"`
			`#include "common/common_types.h"`
			`#include "common/vector_math.h"`

			`#include "video_core/pica.h"`
VideoCore: Run include-what-you-use and fix most includes. 2016-04-30 16:34:51 +01:00			`#include "video_core/pica_types.h"`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00
			`using nihstro::RegisterType;`
			`using nihstro::SourceRegister;`
			`using nihstro::DestRegister;`

			`namespace Pica {`

			`namespace Shader {`

			`struct InputVertex {`
Refactor: Extract VertexLoader from command_processor.cpp. Preparation for a similar concept to Dolphin or PPSSPP. These can be JIT-ed and cached. 2016-04-28 18:01:47 +01:00			`alignas(16) Math::Vec4<float24> attr[16];`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00			`};`

			`struct OutputVertex {`
			`OutputVertex() = default;`

			`// VS output attributes`
			`Math::Vec4<float24> pos;`
Build fix for Debug configurations. 2015-08-16 14:12:43 +01:00			`Math::Vec4<float24> quat;`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00			`Math::Vec4<float24> color;`
			`Math::Vec2<float24> tc0;`
			`Math::Vec2<float24> tc1;`
Pica: Add tc0.w to OutputVertex 2016-04-18 09:51:13 +01:00			`float24 tc0_w;`
			`INSERT_PADDING_WORDS(1);`
pica: Implement decoding of basic fragment lighting components. - Diffuse - Distance attenuation - float16/float20 types - Vertex Shader 'view' output 2015-09-10 03:39:43 +01:00			`Math::Vec3<float24> view;`
			`INSERT_PADDING_WORDS(1);`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00			`Math::Vec2<float24> tc2;`

			`// Padding for optimal alignment`
pica: Implement decoding of basic fragment lighting components. - Diffuse - Distance attenuation - float16/float20 types - Vertex Shader 'view' output 2015-09-10 03:39:43 +01:00			`INSERT_PADDING_WORDS(4);`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00
			`// Attributes used to store intermediate results`

			`// position after perspective divide`
			`Math::Vec3<float24> screenpos;`
pica: Implement decoding of basic fragment lighting components. - Diffuse - Distance attenuation - float16/float20 types - Vertex Shader 'view' output 2015-09-10 03:39:43 +01:00			`INSERT_PADDING_WORDS(1);`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00
			`// Linear interpolation`
			`// factor: 0=this, 1=vtx`
			`void Lerp(float24 factor, const OutputVertex& vtx) {`
			`pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);`

			`// TODO: Should perform perspective correct interpolation here...`
			`tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);`
			`tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor);`
			`tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);`

			`screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);`

			`color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);`
			`}`

			`// Linear interpolation`
			`// factor: 0=v0, 1=v1`
			`static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) {`
			`OutputVertex ret = v0;`
			`ret.Lerp(factor, v1);`
			`return ret;`
			`}`
			`};`
			`static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");`
			`static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");`

Introduce a shader tracer to allow inspection of input/output values for each processed instruction. 2015-07-12 00:57:59 +01:00			`// Helper structure used to keep track of data useful for inspection of shader emulation`
			`template<bool full_debugging>`
			`struct DebugData;`

			`template<>`
			`struct DebugData<false> {`
			`// TODO: Hide these behind and interface and move them to DebugData<true>`
			`u32 max_offset; // maximum program counter ever reached`
			`u32 max_opdesc_id; // maximum swizzle pattern index ever used`
			`};`

			`template<>`
			`struct DebugData<true> {`
			`// Records store the input and output operands of a particular instruction.`
			`struct Record {`
			`enum Type {`
			`// Floating point arithmetic operands`
			`SRC1 = 0x1,`
			`SRC2 = 0x2,`
			`SRC3 = 0x4,`

			`// Initial and final output operand value`
			`DEST_IN = 0x8,`
			`DEST_OUT = 0x10,`

			`// Current and next instruction offset (in words)`
			`CUR_INSTR = 0x20,`
			`NEXT_INSTR = 0x40,`

			`// Output address register value`
			`ADDR_REG_OUT = 0x80,`

			`// Result of a comparison instruction`
			`CMP_RESULT = 0x100,`

			`// Input values for conditional flow control instructions`
			`COND_BOOL_IN = 0x200,`
			`COND_CMP_IN = 0x400,`

			`// Input values for a loop`
			`LOOP_INT_IN = 0x800,`
			`};`

			`Math::Vec4<float24> src1;`
			`Math::Vec4<float24> src2;`
			`Math::Vec4<float24> src3;`

			`Math::Vec4<float24> dest_in;`
			`Math::Vec4<float24> dest_out;`

			`s32 address_registers[2];`
			`bool conditional_code[2];`
			`bool cond_bool;`
			`bool cond_cmp[2];`
			`Math::Vec4<u8> loop_int;`

			`u32 instruction_offset;`
			`u32 next_instruction;`

			`// set of enabled fields (as a combination of Type flags)`
			`unsigned mask = 0;`
			`};`

			`u32 max_offset; // maximum program counter ever reached`
			`u32 max_opdesc_id; // maximum swizzle pattern index ever used`

			`// List of records for each executed shader instruction`
			`std::vector<DebugData<true>::Record> records;`
			`};`

			`// Type alias for better readability`
			`using DebugDataRecord = DebugData<true>::Record;`

			`// Helper function to set a DebugData<true>::Record field based on the template enum parameter.`
			`template<DebugDataRecord::Type type, typename ValueType>`
			`inline void SetField(DebugDataRecord& record, ValueType value);`

			`template<>`
			`inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) {`
			`record.src1.x = value[0];`
			`record.src1.y = value[1];`
			`record.src1.z = value[2];`
			`record.src1.w = value[3];`
			`}`

			`template<>`
			`inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) {`
			`record.src2.x = value[0];`
			`record.src2.y = value[1];`
			`record.src2.z = value[2];`
			`record.src2.w = value[3];`
			`}`

			`template<>`
			`inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) {`
			`record.src3.x = value[0];`
			`record.src3.y = value[1];`
			`record.src3.z = value[2];`
			`record.src3.w = value[3];`
			`}`

			`template<>`
			`inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) {`
			`record.dest_in.x = value[0];`
			`record.dest_in.y = value[1];`
			`record.dest_in.z = value[2];`
			`record.dest_in.w = value[3];`
			`}`

			`template<>`
			`inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) {`
			`record.dest_out.x = value[0];`
			`record.dest_out.y = value[1];`
			`record.dest_out.z = value[2];`
			`record.dest_out.w = value[3];`
			`}`

			`template<>`
			`inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) {`
			`record.address_registers[0] = value[0];`
			`record.address_registers[1] = value[1];`
			`}`

			`template<>`
			`inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) {`
			`record.conditional_code[0] = value[0];`
			`record.conditional_code[1] = value[1];`
			`}`

			`template<>`
			`inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) {`
			`record.cond_bool = value;`
			`}`

			`template<>`
			`inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) {`
			`record.cond_cmp[0] = value[0];`
			`record.cond_cmp[1] = value[1];`
			`}`

			`template<>`
			`inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) {`
			`record.loop_int = value;`
			`}`

			`template<>`
			`inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) {`
			`record.instruction_offset = value;`
			`}`

			`template<>`
			`inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) {`
			`record.next_instruction = value;`
			`}`

			`// Helper function to set debug information on the current shader iteration.`
			`template<DebugDataRecord::Type type, typename ValueType>`
			`inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) {`
			`// Debugging disabled => nothing to do`
			`}`

			`template<DebugDataRecord::Type type, typename ValueType>`
			`inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) {`
			`if (offset >= debug_data.records.size())`
			`debug_data.records.resize(offset + 1);`

			`SetField<type, ValueType>(debug_data.records[offset], value);`
			`debug_data.records[offset].mask \|= type;`
			`}`


Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00			`/**`
			`* This structure contains the state information that needs to be unique for a shader unit. The 3DS`
			`* has four shader units that process shaders in parallel. At the present, Citra only implements a`
			`* single shader unit that processes all shaders serially. Putting the state information in a struct`
			`* here will make it easier for us to parallelize the shader processing later.`
			`*/`
Introduce a shader tracer to allow inspection of input/output values for each processed instruction. 2015-07-12 00:57:59 +01:00			`template<bool Debug>`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00			`struct UnitState {`
Shader: Use a POD struct for registers. 2015-08-15 21:51:32 +01:00			`struct Registers {`
			`// The registers are accessed by the shader JIT using SSE instructions, and are therefore`
			`// required to be 16-byte aligned.`
Common: Get rid of alignment macros The gl rasterizer already uses alignas, so we may as well move everything over. 2016-03-09 06:28:26 +00:00			`alignas(16) Math::Vec4<float24> input[16];`
			`alignas(16) Math::Vec4<float24> output[16];`
			`alignas(16) Math::Vec4<float24> temporary[16];`
Shader: Use a POD struct for registers. 2015-08-15 21:51:32 +01:00			`} registers;`
			`static_assert(std::is_pod<Registers>::value, "Structure is not POD");`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00
			`bool conditional_code[2];`

			`// Two Address registers and one loop counter`
			`// TODO: How many bits do these actually have?`
			`s32 address_registers[3];`

Introduce a shader tracer to allow inspection of input/output values for each processed instruction. 2015-07-12 00:57:59 +01:00			`DebugData<Debug> debug;`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00
Shader: Fix size_t to int casts of register offsets 2015-09-07 06:49:57 +01:00			`static size_t InputOffset(const SourceRegister& reg) {`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00			`switch (reg.GetRegisterType()) {`
			`case RegisterType::Input:`
Shader: Fix size_t to int casts of register offsets 2015-09-07 06:49:57 +01:00			`return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00
			`case RegisterType::Temporary:`
Shader: Fix size_t to int casts of register offsets 2015-09-07 06:49:57 +01:00			`return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00
			`default:`
			`UNREACHABLE();`
			`return 0;`
			`}`
			`}`

Shader: Fix size_t to int casts of register offsets 2015-09-07 06:49:57 +01:00			`static size_t OutputOffset(const DestRegister& reg) {`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00			`switch (reg.GetRegisterType()) {`
			`case RegisterType::Output:`
Shader: Fix size_t to int casts of register offsets 2015-09-07 06:49:57 +01:00			`return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00
			`case RegisterType::Temporary:`
Shader: Fix size_t to int casts of register offsets 2015-09-07 06:49:57 +01:00			`return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00
			`default:`
			`UNREACHABLE();`
			`return 0;`
			`}`
			`}`
			`};`

Turn ShaderSetup into struct 2016-03-30 01:45:18 +01:00			`/// Clears the shader cache`
			`void ClearCache();`

			`struct ShaderSetup {`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00
Turn ShaderSetup into struct 2016-03-30 01:45:18 +01:00			`struct {`
			`// The float uniforms are accessed by the shader JIT using SSE instructions, and are`
			`// therefore required to be 16-byte aligned.`
			`alignas(16) Math::Vec4<float24> f[96];`
Shader: Initial implementation of x86_x64 JIT compiler for Pica vertex shaders. - Config: Add an option for selecting to use shader JIT or interpreter. - Qt: Add a menu option for enabling/disabling the shader JIT. 2015-07-23 04:25:30 +01:00
Turn ShaderSetup into struct 2016-03-30 01:45:18 +01:00			`std::array<bool, 16> b;`
			`std::array<Math::Vec4<u8>, 4> i;`
			`} uniforms;`
Introduce a shader tracer to allow inspection of input/output values for each processed instruction. 2015-07-12 00:57:59 +01:00
Turn ShaderSetup into struct 2016-03-30 01:45:18 +01:00			`std::array<u32, 1024> program_code;`
			`std::array<u32, 1024> swizzle_data;`

			`/**`
			`* Performs any shader unit setup that only needs to happen once per shader (as opposed to once per`
			* vertex, which would happen within the `Run` function).
			`*/`
			`void Setup();`

			`/**`
			`* Runs the currently setup shader`
			`* @param state Shader unit state, must be setup per shader and per shader unit`
			`* @param input Input vertex into the shader`
			`* @param num_attributes The number of vertex shader attributes`
			`* @return The output vertex, after having been processed by the vertex shader`
			`*/`
			`OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);`

			`/**`
			`* Produce debug information based on the given shader and input vertex`
			`* @param input Input vertex into the shader`
			`* @param num_attributes The number of vertex shader attributes`
			`* @param config Configuration object for the shader pipeline`
			`* @param setup Setup object for the shader pipeline`
			`* @return Debug information for this shader with regards to the given vertex`
			`*/`
			`DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup);`

			`};`
Shader: Define a common interface for running vertex shader programs. 2015-07-22 00:38:59 +01:00
			`} // namespace Shader`

			`} // namespace Pica`