// Copyright 2014 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once #include #include #include #include #include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "common/vector_math.h" #include "common/logging/log.h" #include "pica_types.h" namespace Pica { // Returns index corresponding to the Regs member labeled by field_name // TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions // when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])). // For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members // Hopefully, this will be fixed sometime in the future. // For lack of better alternatives, we currently hardcode the offsets when constant // expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts // will then make sure the offsets indeed match the automatically calculated ones). #define PICA_REG_INDEX(field_name) (offsetof(Pica::Regs, field_name) / sizeof(u32)) #if defined(_MSC_VER) #define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index) #else // NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler // really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX // and then performs a (no-op) cast to size_t iff the second argument matches the expected // field offset. Otherwise, the compiler will fail to compile this code. #define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ ((typename std::enable_if::type)PICA_REG_INDEX(field_name)) #endif // _MSC_VER struct Regs { INSERT_PADDING_WORDS(0x10); u32 trigger_irq; INSERT_PADDING_WORDS(0x2f); enum class CullMode : u32 { // Select which polygons are considered to be "frontfacing". KeepAll = 0, KeepClockWise = 1, KeepCounterClockWise = 2, // TODO: What does the third value imply? }; union { BitField<0, 2, CullMode> cull_mode; }; BitField<0, 24, u32> viewport_size_x; INSERT_PADDING_WORDS(0x1); BitField<0, 24, u32> viewport_size_y; INSERT_PADDING_WORDS(0x9); BitField<0, 24, u32> viewport_depth_range; // float24 BitField<0, 24, u32> viewport_depth_far_plane; // float24 INSERT_PADDING_WORDS(0x1); union VSOutputAttributes { // Maps components of output vertex attributes to semantics enum Semantic : u32 { POSITION_X = 0, POSITION_Y = 1, POSITION_Z = 2, POSITION_W = 3, QUATERNION_X = 4, QUATERNION_Y = 5, QUATERNION_Z = 6, QUATERNION_W = 7, COLOR_R = 8, COLOR_G = 9, COLOR_B = 10, COLOR_A = 11, TEXCOORD0_U = 12, TEXCOORD0_V = 13, TEXCOORD1_U = 14, TEXCOORD1_V = 15, // TODO: Not verified VIEW_X = 18, VIEW_Y = 19, VIEW_Z = 20, TEXCOORD2_U = 22, TEXCOORD2_V = 23, INVALID = 31, }; BitField< 0, 5, Semantic> map_x; BitField< 8, 5, Semantic> map_y; BitField<16, 5, Semantic> map_z; BitField<24, 5, Semantic> map_w; } vs_output_attributes[7]; INSERT_PADDING_WORDS(0x11); union { BitField< 0, 16, u32> x; BitField<16, 16, u32> y; } viewport_corner; INSERT_PADDING_WORDS(0x17); struct TextureConfig { enum WrapMode : u32 { ClampToEdge = 0, ClampToBorder = 1, Repeat = 2, MirroredRepeat = 3, }; enum TextureFilter : u32 { Nearest = 0, Linear = 1 }; union { u32 raw; BitField< 0, 8, u32> r; BitField< 8, 8, u32> g; BitField<16, 8, u32> b; BitField<24, 8, u32> a; } border_color; union { BitField< 0, 16, u32> height; BitField<16, 16, u32> width; }; union { BitField< 1, 1, TextureFilter> mag_filter; BitField< 2, 1, TextureFilter> min_filter; BitField< 8, 2, WrapMode> wrap_t; BitField<12, 2, WrapMode> wrap_s; }; INSERT_PADDING_WORDS(0x1); u32 address; u32 GetPhysicalAddress() const { return DecodeAddressRegister(address); } // texture1 and texture2 store the texture format directly after the address // whereas texture0 inserts some additional flags inbetween. // Hence, we store the format separately so that all other parameters can be described // in a single structure. }; enum class TextureFormat : u32 { RGBA8 = 0, RGB8 = 1, RGB5A1 = 2, RGB565 = 3, RGBA4 = 4, IA8 = 5, RG8 = 6, ///< @note Also called HILO8 in 3DBrew. I8 = 7, A8 = 8, IA4 = 9, I4 = 10, A4 = 11, ETC1 = 12, // compressed ETC1A4 = 13, // compressed }; enum class LogicOp : u32 { Clear = 0, And = 1, AndReverse = 2, Copy = 3, Set = 4, CopyInverted = 5, NoOp = 6, Invert = 7, Nand = 8, Or = 9, Nor = 10, Xor = 11, Equiv = 12, AndInverted = 13, OrReverse = 14, OrInverted = 15, }; static unsigned NibblesPerPixel(TextureFormat format) { switch (format) { case TextureFormat::RGBA8: return 8; case TextureFormat::RGB8: return 6; case TextureFormat::RGB5A1: case TextureFormat::RGB565: case TextureFormat::RGBA4: case TextureFormat::IA8: case TextureFormat::RG8: return 4; case TextureFormat::I4: case TextureFormat::A4: return 1; case TextureFormat::I8: case TextureFormat::A8: case TextureFormat::IA4: default: // placeholder for yet unknown formats return 2; } } union { BitField< 0, 1, u32> texture0_enable; BitField< 1, 1, u32> texture1_enable; BitField< 2, 1, u32> texture2_enable; }; TextureConfig texture0; INSERT_PADDING_WORDS(0x8); BitField<0, 4, TextureFormat> texture0_format; BitField<0, 1, u32> fragment_lighting_enable; INSERT_PADDING_WORDS(0x1); TextureConfig texture1; BitField<0, 4, TextureFormat> texture1_format; INSERT_PADDING_WORDS(0x2); TextureConfig texture2; BitField<0, 4, TextureFormat> texture2_format; INSERT_PADDING_WORDS(0x21); struct FullTextureConfig { const bool enabled; const TextureConfig config; const TextureFormat format; }; const std::array GetTextures() const { return {{ { texture0_enable.ToBool(), texture0, texture0_format }, { texture1_enable.ToBool(), texture1, texture1_format }, { texture2_enable.ToBool(), texture2, texture2_format } }}; } // 0xc0-0xff: Texture Combiner (akin to glTexEnv) struct TevStageConfig { enum class Source : u32 { PrimaryColor = 0x0, PrimaryFragmentColor = 0x1, SecondaryFragmentColor = 0x2, Texture0 = 0x3, Texture1 = 0x4, Texture2 = 0x5, Texture3 = 0x6, PreviousBuffer = 0xd, Constant = 0xe, Previous = 0xf, }; enum class ColorModifier : u32 { SourceColor = 0x0, OneMinusSourceColor = 0x1, SourceAlpha = 0x2, OneMinusSourceAlpha = 0x3, SourceRed = 0x4, OneMinusSourceRed = 0x5, SourceGreen = 0x8, OneMinusSourceGreen = 0x9, SourceBlue = 0xc, OneMinusSourceBlue = 0xd, }; enum class AlphaModifier : u32 { SourceAlpha = 0x0, OneMinusSourceAlpha = 0x1, SourceRed = 0x2, OneMinusSourceRed = 0x3, SourceGreen = 0x4, OneMinusSourceGreen = 0x5, SourceBlue = 0x6, OneMinusSourceBlue = 0x7, }; enum class Operation : u32 { Replace = 0, Modulate = 1, Add = 2, AddSigned = 3, Lerp = 4, Subtract = 5, Dot3_RGB = 6, MultiplyThenAdd = 8, AddThenMultiply = 9, }; union { u32 sources_raw; BitField< 0, 4, Source> color_source1; BitField< 4, 4, Source> color_source2; BitField< 8, 4, Source> color_source3; BitField<16, 4, Source> alpha_source1; BitField<20, 4, Source> alpha_source2; BitField<24, 4, Source> alpha_source3; }; union { u32 modifiers_raw; BitField< 0, 4, ColorModifier> color_modifier1; BitField< 4, 4, ColorModifier> color_modifier2; BitField< 8, 4, ColorModifier> color_modifier3; BitField<12, 3, AlphaModifier> alpha_modifier1; BitField<16, 3, AlphaModifier> alpha_modifier2; BitField<20, 3, AlphaModifier> alpha_modifier3; }; union { u32 ops_raw; BitField< 0, 4, Operation> color_op; BitField<16, 4, Operation> alpha_op; }; union { u32 const_color; BitField< 0, 8, u32> const_r; BitField< 8, 8, u32> const_g; BitField<16, 8, u32> const_b; BitField<24, 8, u32> const_a; }; union { u32 scales_raw; BitField< 0, 2, u32> color_scale; BitField<16, 2, u32> alpha_scale; }; inline unsigned GetColorMultiplier() const { return (color_scale < 3) ? (1 << color_scale) : 1; } inline unsigned GetAlphaMultiplier() const { return (alpha_scale < 3) ? (1 << alpha_scale) : 1; } }; TevStageConfig tev_stage0; INSERT_PADDING_WORDS(0x3); TevStageConfig tev_stage1; INSERT_PADDING_WORDS(0x3); TevStageConfig tev_stage2; INSERT_PADDING_WORDS(0x3); TevStageConfig tev_stage3; INSERT_PADDING_WORDS(0x3); union { // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in // these masks are set BitField< 8, 4, u32> update_mask_rgb; BitField<12, 4, u32> update_mask_a; bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { return (stage_index < 4) && (update_mask_rgb & (1 << stage_index)); } bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { return (stage_index < 4) && (update_mask_a & (1 << stage_index)); } } tev_combiner_buffer_input; INSERT_PADDING_WORDS(0xf); TevStageConfig tev_stage4; INSERT_PADDING_WORDS(0x3); TevStageConfig tev_stage5; union { u32 raw; BitField< 0, 8, u32> r; BitField< 8, 8, u32> g; BitField<16, 8, u32> b; BitField<24, 8, u32> a; } tev_combiner_buffer_color; INSERT_PADDING_WORDS(0x2); const std::array GetTevStages() const { return {{ tev_stage0, tev_stage1, tev_stage2, tev_stage3, tev_stage4, tev_stage5 }}; }; enum class BlendEquation : u32 { Add = 0, Subtract = 1, ReverseSubtract = 2, Min = 3, Max = 4, }; enum class BlendFactor : u32 { Zero = 0, One = 1, SourceColor = 2, OneMinusSourceColor = 3, DestColor = 4, OneMinusDestColor = 5, SourceAlpha = 6, OneMinusSourceAlpha = 7, DestAlpha = 8, OneMinusDestAlpha = 9, ConstantColor = 10, OneMinusConstantColor = 11, ConstantAlpha = 12, OneMinusConstantAlpha = 13, SourceAlphaSaturate = 14, }; enum class CompareFunc : u32 { Never = 0, Always = 1, Equal = 2, NotEqual = 3, LessThan = 4, LessThanOrEqual = 5, GreaterThan = 6, GreaterThanOrEqual = 7, }; enum class StencilAction : u32 { Keep = 0, Zero = 1, Replace = 2, Increment = 3, Decrement = 4, Invert = 5, IncrementWrap = 6, DecrementWrap = 7 }; struct { union { // If false, logic blending is used BitField<8, 1, u32> alphablend_enable; }; union { BitField< 0, 8, BlendEquation> blend_equation_rgb; BitField< 8, 8, BlendEquation> blend_equation_a; BitField<16, 4, BlendFactor> factor_source_rgb; BitField<20, 4, BlendFactor> factor_dest_rgb; BitField<24, 4, BlendFactor> factor_source_a; BitField<28, 4, BlendFactor> factor_dest_a; } alpha_blending; union { BitField<0, 4, LogicOp> logic_op; }; union { u32 raw; BitField< 0, 8, u32> r; BitField< 8, 8, u32> g; BitField<16, 8, u32> b; BitField<24, 8, u32> a; } blend_const; union { BitField< 0, 1, u32> enable; BitField< 4, 3, CompareFunc> func; BitField< 8, 8, u32> ref; } alpha_test; struct { union { // Raw value of this register u32 raw_func; // If true, enable stencil testing BitField< 0, 1, u32> enable; // Comparison operation for stencil testing BitField< 4, 3, CompareFunc> func; // Mask used to control writing to the stencil buffer BitField< 8, 8, u32> write_mask; // Value to compare against for stencil testing BitField<16, 8, u32> reference_value; // Mask to apply on stencil test inputs BitField<24, 8, u32> input_mask; }; union { // Raw value of this register u32 raw_op; // Action to perform when the stencil test fails BitField< 0, 3, StencilAction> action_stencil_fail; // Action to perform when stencil testing passed but depth testing fails BitField< 4, 3, StencilAction> action_depth_fail; // Action to perform when both stencil and depth testing pass BitField< 8, 3, StencilAction> action_depth_pass; }; } stencil_test; union { BitField< 0, 1, u32> depth_test_enable; BitField< 4, 3, CompareFunc> depth_test_func; BitField< 8, 1, u32> red_enable; BitField< 9, 1, u32> green_enable; BitField<10, 1, u32> blue_enable; BitField<11, 1, u32> alpha_enable; BitField<12, 1, u32> depth_write_enable; }; INSERT_PADDING_WORDS(0x8); } output_merger; // Components are laid out in reverse byte order, most significant bits first. enum class ColorFormat : u32 { RGBA8 = 0, RGB8 = 1, RGB5A1 = 2, RGB565 = 3, RGBA4 = 4, }; enum class DepthFormat : u32 { D16 = 0, D24 = 2, D24S8 = 3, }; // Returns the number of bytes in the specified color format static unsigned BytesPerColorPixel(ColorFormat format) { switch (format) { case ColorFormat::RGBA8: return 4; case ColorFormat::RGB8: return 3; case ColorFormat::RGB5A1: case ColorFormat::RGB565: case ColorFormat::RGBA4: return 2; default: LOG_CRITICAL(HW_GPU, "Unknown color format %u", format); UNIMPLEMENTED(); } } struct { INSERT_PADDING_WORDS(0x6); DepthFormat depth_format; // TODO: Should be a BitField! BitField<16, 3, ColorFormat> color_format; INSERT_PADDING_WORDS(0x4); u32 depth_buffer_address; u32 color_buffer_address; union { // Apparently, the framebuffer width is stored as expected, // while the height is stored as the actual height minus one. // Hence, don't access these fields directly but use the accessors // GetWidth() and GetHeight() instead. BitField< 0, 11, u32> width; BitField<12, 10, u32> height; }; INSERT_PADDING_WORDS(0x1); inline u32 GetColorBufferPhysicalAddress() const { return DecodeAddressRegister(color_buffer_address); } inline u32 GetDepthBufferPhysicalAddress() const { return DecodeAddressRegister(depth_buffer_address); } inline u32 GetWidth() const { return width; } inline u32 GetHeight() const { return height + 1; } } framebuffer; // Returns the number of bytes in the specified depth format static u32 BytesPerDepthPixel(DepthFormat format) { switch (format) { case DepthFormat::D16: return 2; case DepthFormat::D24: return 3; case DepthFormat::D24S8: return 4; default: LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); UNIMPLEMENTED(); } } // Returns the number of bits per depth component of the specified depth format static u32 DepthBitsPerPixel(DepthFormat format) { switch (format) { case DepthFormat::D16: return 16; case DepthFormat::D24: case DepthFormat::D24S8: return 24; default: LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); UNIMPLEMENTED(); } } INSERT_PADDING_WORDS(0x20); enum class LightingSampler { Distribution0 = 0, Distribution1 = 1, Fresnel = 3, ReflectBlue = 4, ReflectGreen = 5, ReflectRed = 6, SpotlightAttenuation = 8, DistanceAttenuation = 16, }; /** * Pica fragment lighting supports using different LUTs for each lighting component: * Reflectance R, G, and B channels, distribution function for specular components 0 and 1, * fresnel factor, and spotlight attenuation. Furthermore, which LUTs are used for each channel * (or whether a channel is enabled at all) is specified by various pre-defined lighting * configurations. With configurations that require more LUTs, more cycles are required on HW to * perform lighting computations. */ enum class LightingConfig { Config0 = 0, ///< Reflect Red, Distribution 0, Spotlight Config1 = 1, ///< Reflect Red, Fresnel, Spotlight Config2 = 2, ///< Reflect Red, Distribution 0/1 Config3 = 3, ///< Distribution 0/1, Fresnel Config4 = 4, ///< Reflect Red/Green/Blue, Distribution 0/1, Spotlight Config5 = 5, ///< Reflect Red/Green/Blue, Distribution 0, Fresnel, Spotlight Config6 = 6, ///< Reflect Red, Distribution 0/1, Fresnel, Spotlight Config7 = 8, ///< Reflect Red/Green/Blue, Distribution 0/1, Fresnel, Spotlight ///< NOTE: '8' is intentional, '7' does not appear to be a valid configuration }; /// Selects which lighting components are affected by fresnel enum class LightingFresnelSelector { None = 0, ///< Fresnel is disabled PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel Both = PrimaryAlpha | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel }; /// Factor used to scale the output of a lighting LUT enum class LightingScale { Scale1 = 0, ///< Scale is 1x Scale2 = 1, ///< Scale is 2x Scale4 = 2, ///< Scale is 4x Scale8 = 3, ///< Scale is 8x Scale1_4 = 6, ///< Scale is 0.25x Scale1_2 = 7, ///< Scale is 0.5x }; enum class LightingLutInput { NH = 0, // Cosine of the angle between the normal and half-angle vectors VH = 1, // Cosine of the angle between the view and half-angle vectors NV = 2, // Cosine of the angle between the normal and the view vector LN = 3, // Cosine of the angle between the light and the normal vectors }; enum class LightingBumpMode : u32 { None = 0, NormalMap = 1, TangentMap = 2, }; union LightColor { BitField< 0, 10, u32> b; BitField<10, 10, u32> g; BitField<20, 10, u32> r; Math::Vec3f ToVec3f() const { // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color component return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f); } }; /// Returns true if the specified lighting sampler is supported by the current Pica lighting configuration static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) { switch (sampler) { case LightingSampler::Distribution0: return (config != LightingConfig::Config1); case LightingSampler::Distribution1: return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5); case LightingSampler::Fresnel: return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4); case LightingSampler::ReflectRed: return (config != LightingConfig::Config3); case LightingSampler::ReflectGreen: case LightingSampler::ReflectBlue: return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); } return false; } struct { struct LightSrc { LightColor specular_0; // material.specular_0 * light.specular_0 LightColor specular_1; // material.specular_1 * light.specular_1 LightColor diffuse; // material.diffuse * light.diffuse LightColor ambient; // material.ambient * light.ambient struct { // Encoded as 16-bit floating point union { BitField< 0, 16, u32> x; BitField<16, 16, u32> y; }; union { BitField< 0, 16, u32> z; }; INSERT_PADDING_WORDS(0x3); union { BitField<0, 1, u32> directional; BitField<1, 1, u32> two_sided_diffuse; // 1: GL_TRUE, 0: GL_FALSE; when disabled, clamp dot-product to 0 }; }; BitField<0, 20, u32> dist_atten_bias; BitField<0, 20, u32> dist_atten_scale; INSERT_PADDING_WORDS(0x4); }; static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words"); LightSrc light[8]; LightColor global_ambient; // emission + (material.ambient * lighting.ambient) INSERT_PADDING_WORDS(0x1); BitField<0, 3, u32> src_num; // number of enabled lights - 1 union { BitField< 2, 2, LightingFresnelSelector> fresnel_selector; BitField< 4, 4, LightingConfig> config; BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2 BitField<27, 1, u32> clamp_highlights; // 1: GL_TRUE, 0: GL_FALSE BitField<28, 2, LightingBumpMode> bump_mode; // 1: GL_TRUE, 0: GL_FALSE BitField<30, 1, u32> bump_renorm; // 0: GL_TRUE, 1: GL_FALSE }; union { BitField<16, 1, u32> lut_enable_d0; // 0: GL_TRUE, 1: GL_FALSE BitField<17, 1, u32> lut_enable_d1; // 0: GL_TRUE, 1: GL_FALSE BitField<19, 1, u32> lut_enable_fr; // 0: GL_TRUE, 1: GL_FALSE BitField<20, 1, u32> lut_enable_rr; // 0: GL_TRUE, 1: GL_FALSE BitField<21, 1, u32> lut_enable_rg; // 0: GL_TRUE, 1: GL_FALSE BitField<22, 1, u32> lut_enable_rb; // 0: GL_TRUE, 1: GL_FALSE // Each bit specifies whether distance attenuation should be applied for the // corresponding light BitField<24, 1, u32> dist_atten_enable_light_0; // 0: GL_TRUE, 1: GL_FALSE BitField<25, 1, u32> dist_atten_enable_light_1; // 0: GL_TRUE, 1: GL_FALSE BitField<26, 1, u32> dist_atten_enable_light_2; // 0: GL_TRUE, 1: GL_FALSE BitField<27, 1, u32> dist_atten_enable_light_3; // 0: GL_TRUE, 1: GL_FALSE BitField<28, 1, u32> dist_atten_enable_light_4; // 0: GL_TRUE, 1: GL_FALSE BitField<29, 1, u32> dist_atten_enable_light_5; // 0: GL_TRUE, 1: GL_FALSE BitField<30, 1, u32> dist_atten_enable_light_6; // 0: GL_TRUE, 1: GL_FALSE BitField<31, 1, u32> dist_atten_enable_light_7; // 0: GL_TRUE, 1: GL_FALSE }; bool IsDistAttenEnabled(unsigned index) const { const unsigned enable[] = { dist_atten_enable_light_0, dist_atten_enable_light_1, dist_atten_enable_light_2, dist_atten_enable_light_3, dist_atten_enable_light_4, dist_atten_enable_light_5, dist_atten_enable_light_6, dist_atten_enable_light_7 }; return enable[index] == 0; } union { BitField<0, 8, u32> index; ///< Index at which to set data in the LUT BitField<8, 5, u32> type; ///< Type of LUT for which to set data } lut_config; BitField<0, 1, u32> disable; INSERT_PADDING_WORDS(0x1); // When data is written to any of these registers, it gets written to the lookup table of // the selected type at the selected index, specified above in the `lut_config` register. // With each write, `lut_config.index` is incremented. It does not matter which of these // registers is written to, the behavior will be the same. u32 lut_data[8]; union { BitField< 1, 1, u32> d0; // 0: GL_TRUE, 1: GL_FALSE BitField< 5, 1, u32> d1; // 0: GL_TRUE, 1: GL_FALSE BitField< 9, 1, u32> sp; // 0: GL_TRUE, 1: GL_FALSE BitField<13, 1, u32> fr; // 0: GL_TRUE, 1: GL_FALSE BitField<17, 1, u32> rb; // 0: GL_TRUE, 1: GL_FALSE BitField<21, 1, u32> rg; // 0: GL_TRUE, 1: GL_FALSE BitField<25, 1, u32> rr; // 0: GL_TRUE, 1: GL_FALSE } abs_lut_input; union { BitField< 0, 3, LightingLutInput> d0; BitField< 4, 3, LightingLutInput> d1; BitField< 8, 3, LightingLutInput> sp; BitField<12, 3, LightingLutInput> fr; BitField<16, 3, LightingLutInput> rb; BitField<20, 3, LightingLutInput> rg; BitField<24, 3, LightingLutInput> rr; } lut_input; union { BitField< 0, 3, LightingScale> d0; BitField< 4, 3, LightingScale> d1; BitField< 8, 3, LightingScale> sp; BitField<12, 3, LightingScale> fr; BitField<16, 3, LightingScale> rb; BitField<20, 3, LightingScale> rg; BitField<24, 3, LightingScale> rr; static float GetScale(LightingScale scale) { switch (scale) { case LightingScale::Scale1: return 1.0f; case LightingScale::Scale2: return 2.0f; case LightingScale::Scale4: return 4.0f; case LightingScale::Scale8: return 8.0f; case LightingScale::Scale1_4: return 0.25f; case LightingScale::Scale1_2: return 0.5f; } return 0.0f; } } lut_scale; INSERT_PADDING_WORDS(0x6); union { // There are 8 light enable "slots", corresponding to the total number of lights // supported by Pica. For N enabled lights (specified by register 0x1c2, or 'src_num' // above), the first N slots below will be set to integers within the range of 0-7, // corresponding to the actual light that is enabled for each slot. BitField< 0, 3, u32> slot_0; BitField< 4, 3, u32> slot_1; BitField< 8, 3, u32> slot_2; BitField<12, 3, u32> slot_3; BitField<16, 3, u32> slot_4; BitField<20, 3, u32> slot_5; BitField<24, 3, u32> slot_6; BitField<28, 3, u32> slot_7; unsigned GetNum(unsigned index) const { const unsigned enable_slots[] = { slot_0, slot_1, slot_2, slot_3, slot_4, slot_5, slot_6, slot_7 }; return enable_slots[index]; } } light_enable; } lighting; INSERT_PADDING_WORDS(0x26); enum class VertexAttributeFormat : u64 { BYTE = 0, UBYTE = 1, SHORT = 2, FLOAT = 3, }; struct { BitField<0, 29, u32> base_address; u32 GetPhysicalBaseAddress() const { return DecodeAddressRegister(base_address); } // Descriptor for internal vertex attributes union { BitField< 0, 2, VertexAttributeFormat> format0; // size of one element BitField< 2, 2, u64> size0; // number of elements minus 1 BitField< 4, 2, VertexAttributeFormat> format1; BitField< 6, 2, u64> size1; BitField< 8, 2, VertexAttributeFormat> format2; BitField<10, 2, u64> size2; BitField<12, 2, VertexAttributeFormat> format3; BitField<14, 2, u64> size3; BitField<16, 2, VertexAttributeFormat> format4; BitField<18, 2, u64> size4; BitField<20, 2, VertexAttributeFormat> format5; BitField<22, 2, u64> size5; BitField<24, 2, VertexAttributeFormat> format6; BitField<26, 2, u64> size6; BitField<28, 2, VertexAttributeFormat> format7; BitField<30, 2, u64> size7; BitField<32, 2, VertexAttributeFormat> format8; BitField<34, 2, u64> size8; BitField<36, 2, VertexAttributeFormat> format9; BitField<38, 2, u64> size9; BitField<40, 2, VertexAttributeFormat> format10; BitField<42, 2, u64> size10; BitField<44, 2, VertexAttributeFormat> format11; BitField<46, 2, u64> size11; BitField<48, 12, u64> attribute_mask; // number of total attributes minus 1 BitField<60, 4, u64> num_extra_attributes; }; inline VertexAttributeFormat GetFormat(int n) const { VertexAttributeFormat formats[] = { format0, format1, format2, format3, format4, format5, format6, format7, format8, format9, format10, format11 }; return formats[n]; } inline int GetNumElements(int n) const { u64 sizes[] = { size0, size1, size2, size3, size4, size5, size6, size7, size8, size9, size10, size11 }; return (int)sizes[n]+1; } inline int GetElementSizeInBytes(int n) const { return (GetFormat(n) == VertexAttributeFormat::FLOAT) ? 4 : (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1; } inline int GetStride(int n) const { return GetNumElements(n) * GetElementSizeInBytes(n); } inline bool IsDefaultAttribute(int id) const { return (id >= 12) || (attribute_mask & (1ULL << id)) != 0; } inline int GetNumTotalAttributes() const { return (int)num_extra_attributes+1; } // Attribute loaders map the source vertex data to input attributes // This e.g. allows to load different attributes from different memory locations struct { // Source attribute data offset from the base address u32 data_offset; union { BitField< 0, 4, u64> comp0; BitField< 4, 4, u64> comp1; BitField< 8, 4, u64> comp2; BitField<12, 4, u64> comp3; BitField<16, 4, u64> comp4; BitField<20, 4, u64> comp5; BitField<24, 4, u64> comp6; BitField<28, 4, u64> comp7; BitField<32, 4, u64> comp8; BitField<36, 4, u64> comp9; BitField<40, 4, u64> comp10; BitField<44, 4, u64> comp11; // bytes for a single vertex in this loader BitField<48, 8, u64> byte_count; BitField<60, 4, u64> component_count; }; inline int GetComponent(int n) const { u64 components[] = { comp0, comp1, comp2, comp3, comp4, comp5, comp6, comp7, comp8, comp9, comp10, comp11 }; return (int)components[n]; } } attribute_loaders[12]; } vertex_attributes; struct { enum IndexFormat : u32 { BYTE = 0, SHORT = 1, }; union { BitField<0, 31, u32> offset; // relative to base attribute address BitField<31, 1, IndexFormat> format; }; } index_array; // Number of vertices to render u32 num_vertices; INSERT_PADDING_WORDS(0x1); // The index of the first vertex to render u32 vertex_offset; INSERT_PADDING_WORDS(0x3); // These two trigger rendering of triangles u32 trigger_draw; u32 trigger_draw_indexed; INSERT_PADDING_WORDS(0x2); // These registers are used to setup the default "fall-back" vertex shader attributes struct { // Index of the current default attribute u32 index; // Writing to these registers sets the "current" default attribute. u32 set_value[3]; } vs_default_attributes_setup; INSERT_PADDING_WORDS(0x2); struct { // There are two channels that can be used to configure the next command buffer, which // can be then executed by writing to the "trigger" registers. There are two reasons why a // game might use this feature: // 1) With this, an arbitrary number of additional command buffers may be executed in // sequence without requiring any intervention of the CPU after the initial one is // kicked off. // 2) Games can configure these registers to provide a command list subroutine mechanism. BitField< 0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer BitField< 0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to unsigned GetSize(unsigned index) const { ASSERT(index < 2); return 8 * size[index]; } PAddr GetPhysicalAddress(unsigned index) const { ASSERT(index < 2); return (PAddr)(8 * addr[index]); } } command_buffer; INSERT_PADDING_WORDS(0x20); enum class TriangleTopology : u32 { List = 0, Strip = 1, Fan = 2, Shader = 3, // Programmable setup unit implemented in a geometry shader }; BitField<8, 2, TriangleTopology> triangle_topology; u32 restart_primitive; INSERT_PADDING_WORDS(0x20); struct ShaderConfig { BitField<0, 16, u32> bool_uniforms; union { BitField< 0, 8, u32> x; BitField< 8, 8, u32> y; BitField<16, 8, u32> z; BitField<24, 8, u32> w; } int_uniforms[4]; INSERT_PADDING_WORDS(0x5); // Offset to shader program entry point (in words) BitField<0, 16, u32> main_offset; union { BitField< 0, 4, u64> attribute0_register; BitField< 4, 4, u64> attribute1_register; BitField< 8, 4, u64> attribute2_register; BitField<12, 4, u64> attribute3_register; BitField<16, 4, u64> attribute4_register; BitField<20, 4, u64> attribute5_register; BitField<24, 4, u64> attribute6_register; BitField<28, 4, u64> attribute7_register; BitField<32, 4, u64> attribute8_register; BitField<36, 4, u64> attribute9_register; BitField<40, 4, u64> attribute10_register; BitField<44, 4, u64> attribute11_register; BitField<48, 4, u64> attribute12_register; BitField<52, 4, u64> attribute13_register; BitField<56, 4, u64> attribute14_register; BitField<60, 4, u64> attribute15_register; int GetRegisterForAttribute(int attribute_index) const { u64 fields[] = { attribute0_register, attribute1_register, attribute2_register, attribute3_register, attribute4_register, attribute5_register, attribute6_register, attribute7_register, attribute8_register, attribute9_register, attribute10_register, attribute11_register, attribute12_register, attribute13_register, attribute14_register, attribute15_register, }; return (int)fields[attribute_index]; } } input_register_map; // OUTMAP_MASK, 0x28E, CODETRANSFER_END INSERT_PADDING_WORDS(0x3); struct { enum Format : u32 { FLOAT24 = 0, FLOAT32 = 1 }; bool IsFloat32() const { return format == FLOAT32; } union { // Index of the next uniform to write to // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices // TODO: Maybe the uppermost index is for the geometry shader? Investigate! BitField<0, 7, u32> index; BitField<31, 1, Format> format; }; // Writing to these registers sets the current uniform. u32 set_value[8]; } uniform_setup; INSERT_PADDING_WORDS(0x2); struct { // Offset of the next instruction to write code to. // Incremented with each instruction write. u32 offset; // Writing to these registers sets the "current" word in the shader program. u32 set_word[8]; } program; INSERT_PADDING_WORDS(0x1); // This register group is used to load an internal table of swizzling patterns, // which are indexed by each shader instruction to specify vector component swizzling. struct { // Offset of the next swizzle pattern to write code to. // Incremented with each instruction write. u32 offset; // Writing to these registers sets the current swizzle pattern in the table. u32 set_word[8]; } swizzle_patterns; INSERT_PADDING_WORDS(0x2); }; ShaderConfig gs; ShaderConfig vs; INSERT_PADDING_WORDS(0x20); // Map register indices to names readable by humans // Used for debugging purposes, so performance is not an issue here static std::string GetCommandName(int index); static inline size_t NumIds() { return sizeof(Regs) / sizeof(u32); } u32& operator [] (int index) const { u32* content = (u32*)this; return content[index]; } u32& operator [] (int index) { u32* content = (u32*)this; return content[index]; } private: /* * Most physical addresses which Pica registers refer to are 8-byte aligned. * This function should be used to get the address from a raw register value. */ static inline u32 DecodeAddressRegister(u32 register_value) { return register_value * 8; } }; // TODO: MSVC does not support using offsetof() on non-static data members even though this // is technically allowed since C++11. This macro should be enabled once MSVC adds // support for that. #ifndef _MSC_VER #define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position") ASSERT_REG_POSITION(trigger_irq, 0x10); ASSERT_REG_POSITION(cull_mode, 0x40); ASSERT_REG_POSITION(viewport_size_x, 0x41); ASSERT_REG_POSITION(viewport_size_y, 0x43); ASSERT_REG_POSITION(viewport_depth_range, 0x4d); ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); ASSERT_REG_POSITION(viewport_corner, 0x68); ASSERT_REG_POSITION(texture0_enable, 0x80); ASSERT_REG_POSITION(texture0, 0x81); ASSERT_REG_POSITION(texture0_format, 0x8e); ASSERT_REG_POSITION(fragment_lighting_enable, 0x8f); ASSERT_REG_POSITION(texture1, 0x91); ASSERT_REG_POSITION(texture1_format, 0x96); ASSERT_REG_POSITION(texture2, 0x99); ASSERT_REG_POSITION(texture2_format, 0x9e); ASSERT_REG_POSITION(tev_stage0, 0xc0); ASSERT_REG_POSITION(tev_stage1, 0xc8); ASSERT_REG_POSITION(tev_stage2, 0xd0); ASSERT_REG_POSITION(tev_stage3, 0xd8); ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0); ASSERT_REG_POSITION(tev_stage4, 0xf0); ASSERT_REG_POSITION(tev_stage5, 0xf8); ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd); ASSERT_REG_POSITION(output_merger, 0x100); ASSERT_REG_POSITION(framebuffer, 0x110); ASSERT_REG_POSITION(lighting, 0x140); ASSERT_REG_POSITION(vertex_attributes, 0x200); ASSERT_REG_POSITION(index_array, 0x227); ASSERT_REG_POSITION(num_vertices, 0x228); ASSERT_REG_POSITION(vertex_offset, 0x22a); ASSERT_REG_POSITION(trigger_draw, 0x22e); ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232); ASSERT_REG_POSITION(command_buffer, 0x238); ASSERT_REG_POSITION(triangle_topology, 0x25e); ASSERT_REG_POSITION(restart_primitive, 0x25f); ASSERT_REG_POSITION(gs, 0x280); ASSERT_REG_POSITION(vs, 0x2b0); #undef ASSERT_REG_POSITION #endif // !defined(_MSC_VER) static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig structure has incorrect size"); // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); /// Struct used to describe current Pica state struct State { /// Pica registers Regs regs; /// Vertex shader memory struct ShaderSetup { struct { // The float uniforms are accessed by the shader JIT using SSE instructions, and are // therefore required to be 16-byte aligned. Math::Vec4 MEMORY_ALIGNED16(f[96]); std::array b; std::array, 4> i; } uniforms; Math::Vec4 default_attributes[16]; std::array program_code; std::array swizzle_data; }; ShaderSetup vs; ShaderSetup gs; struct { union LutEntry { // Used for raw access u32 raw; // LUT value, encoded as 12-bit fixed point, with 12 fraction bits BitField< 0, 12, u32> value; // Used by HW for efficient interpolation, Citra does not use these BitField<12, 12, u32> difference; float ToFloat() { return static_cast(value) / 4095.f; } }; std::array, 24> luts; } lighting; /// Current Pica command list struct { const u32* head_ptr; const u32* current_ptr; u32 length; } cmd_list; }; /// Initialize Pica state void Init(); /// Shutdown Pica state void Shutdown(); extern State g_state; ///< Current Pica state } // namespace