mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2024-11-18 12:52:47 +01:00
5b2b6d594c
Games using D3D idioms can join images and samplers when a shader executes, instead of baking them into a combined sampler image. This is also possible on Vulkan. One approach to this solution would be to use separate samplers on Vulkan and leave this unimplemented on OpenGL, but we can't do this because there's no consistent way of determining which constant buffer holds a sampler and which one an image. We could in theory find the first bit and if it's in the TIC area, it's an image; but this falls apart when an image or sampler handle use an index of zero. The used approach is to track for a LOP.OR operation (this is done at an IR level, not at an ISA level), track again the constant buffers used as source and store this pair. Then, outside of shader execution, join the sample and image pair with a bitwise or operation. This approach won't work on games that truly use separate samplers in a meaningful way. For example, pooling textures in a 2D array and determining at runtime what sampler to use. This invalidates OpenGL's disk shader cache :) - Used mostly by D3D ports to Switch
1691 lines
54 KiB
C++
1691 lines
54 KiB
C++
// Copyright 2018 yuzu Emulator Project
|
|
// Licensed under GPLv2 or any later version
|
|
// Refer to the license.txt file included.
|
|
|
|
#pragma once
|
|
|
|
#include <array>
|
|
#include <bitset>
|
|
#include <limits>
|
|
#include <optional>
|
|
#include <type_traits>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
#include "common/assert.h"
|
|
#include "common/bit_field.h"
|
|
#include "common/common_funcs.h"
|
|
#include "common/common_types.h"
|
|
#include "common/math_util.h"
|
|
#include "video_core/engines/const_buffer_engine_interface.h"
|
|
#include "video_core/engines/const_buffer_info.h"
|
|
#include "video_core/engines/engine_interface.h"
|
|
#include "video_core/engines/engine_upload.h"
|
|
#include "video_core/engines/shader_type.h"
|
|
#include "video_core/gpu.h"
|
|
#include "video_core/macro_interpreter.h"
|
|
#include "video_core/textures/texture.h"
|
|
|
|
namespace Core {
|
|
class System;
|
|
}
|
|
|
|
namespace Tegra {
|
|
class MemoryManager;
|
|
}
|
|
|
|
namespace VideoCore {
|
|
class RasterizerInterface;
|
|
}
|
|
|
|
namespace Tegra::Engines {
|
|
|
|
/**
|
|
* This Engine is known as GF100_3D. Documentation can be found in:
|
|
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
|
|
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
|
|
*/
|
|
|
|
#define MAXWELL3D_REG_INDEX(field_name) \
|
|
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
|
|
|
|
class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
|
|
public:
|
|
explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
|
MemoryManager& memory_manager);
|
|
~Maxwell3D() = default;
|
|
|
|
/// Register structure of the Maxwell3D engine.
|
|
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
|
|
struct Regs {
|
|
static constexpr std::size_t NUM_REGS = 0xE00;
|
|
|
|
static constexpr std::size_t NumRenderTargets = 8;
|
|
static constexpr std::size_t NumViewports = 16;
|
|
static constexpr std::size_t NumCBData = 16;
|
|
static constexpr std::size_t NumVertexArrays = 32;
|
|
static constexpr std::size_t NumVertexAttributes = 32;
|
|
static constexpr std::size_t NumVaryings = 31;
|
|
static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number
|
|
static constexpr std::size_t NumClipDistances = 8;
|
|
static constexpr std::size_t NumTransformFeedbackBuffers = 4;
|
|
static constexpr std::size_t MaxShaderProgram = 6;
|
|
static constexpr std::size_t MaxShaderStage = 5;
|
|
// Maximum number of const buffers per shader stage.
|
|
static constexpr std::size_t MaxConstBuffers = 18;
|
|
static constexpr std::size_t MaxConstBufferSize = 0x10000;
|
|
|
|
enum class QueryOperation : u32 {
|
|
Release = 0,
|
|
Acquire = 1,
|
|
Counter = 2,
|
|
Trap = 3,
|
|
};
|
|
|
|
enum class QueryUnit : u32 {
|
|
VFetch = 1,
|
|
VP = 2,
|
|
Rast = 4,
|
|
StrmOut = 5,
|
|
GP = 6,
|
|
ZCull = 7,
|
|
Prop = 10,
|
|
Crop = 15,
|
|
};
|
|
|
|
enum class QuerySelect : u32 {
|
|
Zero = 0,
|
|
TimeElapsed = 2,
|
|
TransformFeedbackPrimitivesGenerated = 11,
|
|
PrimitivesGenerated = 18,
|
|
SamplesPassed = 21,
|
|
TransformFeedbackUnknown = 26,
|
|
};
|
|
|
|
struct QueryCompare {
|
|
u32 initial_sequence;
|
|
u32 initial_mode;
|
|
u32 unknown1;
|
|
u32 unknown2;
|
|
u32 current_sequence;
|
|
u32 current_mode;
|
|
};
|
|
|
|
enum class QuerySyncCondition : u32 {
|
|
NotEqual = 0,
|
|
GreaterThan = 1,
|
|
};
|
|
|
|
enum class ConditionMode : u32 {
|
|
Never = 0,
|
|
Always = 1,
|
|
ResNonZero = 2,
|
|
Equal = 3,
|
|
NotEqual = 4,
|
|
};
|
|
|
|
enum class ShaderProgram : u32 {
|
|
VertexA = 0,
|
|
VertexB = 1,
|
|
TesselationControl = 2,
|
|
TesselationEval = 3,
|
|
Geometry = 4,
|
|
Fragment = 5,
|
|
};
|
|
|
|
struct VertexAttribute {
|
|
enum class Size : u32 {
|
|
Invalid = 0x0,
|
|
Size_32_32_32_32 = 0x01,
|
|
Size_32_32_32 = 0x02,
|
|
Size_16_16_16_16 = 0x03,
|
|
Size_32_32 = 0x04,
|
|
Size_16_16_16 = 0x05,
|
|
Size_8_8_8_8 = 0x0a,
|
|
Size_16_16 = 0x0f,
|
|
Size_32 = 0x12,
|
|
Size_8_8_8 = 0x13,
|
|
Size_8_8 = 0x18,
|
|
Size_16 = 0x1b,
|
|
Size_8 = 0x1d,
|
|
Size_10_10_10_2 = 0x30,
|
|
Size_11_11_10 = 0x31,
|
|
};
|
|
|
|
enum class Type : u32 {
|
|
SignedNorm = 1,
|
|
UnsignedNorm = 2,
|
|
SignedInt = 3,
|
|
UnsignedInt = 4,
|
|
UnsignedScaled = 5,
|
|
SignedScaled = 6,
|
|
Float = 7,
|
|
};
|
|
|
|
union {
|
|
BitField<0, 5, u32> buffer;
|
|
BitField<6, 1, u32> constant;
|
|
BitField<7, 14, u32> offset;
|
|
BitField<21, 6, Size> size;
|
|
BitField<27, 3, Type> type;
|
|
BitField<31, 1, u32> bgra;
|
|
u32 hex;
|
|
};
|
|
|
|
u32 ComponentCount() const {
|
|
switch (size) {
|
|
case Size::Size_32_32_32_32:
|
|
return 4;
|
|
case Size::Size_32_32_32:
|
|
return 3;
|
|
case Size::Size_16_16_16_16:
|
|
return 4;
|
|
case Size::Size_32_32:
|
|
return 2;
|
|
case Size::Size_16_16_16:
|
|
return 3;
|
|
case Size::Size_8_8_8_8:
|
|
return 4;
|
|
case Size::Size_16_16:
|
|
return 2;
|
|
case Size::Size_32:
|
|
return 1;
|
|
case Size::Size_8_8_8:
|
|
return 3;
|
|
case Size::Size_8_8:
|
|
return 2;
|
|
case Size::Size_16:
|
|
return 1;
|
|
case Size::Size_8:
|
|
return 1;
|
|
case Size::Size_10_10_10_2:
|
|
return 4;
|
|
case Size::Size_11_11_10:
|
|
return 3;
|
|
default:
|
|
UNREACHABLE();
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
u32 SizeInBytes() const {
|
|
switch (size) {
|
|
case Size::Size_32_32_32_32:
|
|
return 16;
|
|
case Size::Size_32_32_32:
|
|
return 12;
|
|
case Size::Size_16_16_16_16:
|
|
return 8;
|
|
case Size::Size_32_32:
|
|
return 8;
|
|
case Size::Size_16_16_16:
|
|
return 6;
|
|
case Size::Size_8_8_8_8:
|
|
return 4;
|
|
case Size::Size_16_16:
|
|
return 4;
|
|
case Size::Size_32:
|
|
return 4;
|
|
case Size::Size_8_8_8:
|
|
return 3;
|
|
case Size::Size_8_8:
|
|
return 2;
|
|
case Size::Size_16:
|
|
return 2;
|
|
case Size::Size_8:
|
|
return 1;
|
|
case Size::Size_10_10_10_2:
|
|
return 4;
|
|
case Size::Size_11_11_10:
|
|
return 4;
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
|
|
std::string SizeString() const {
|
|
switch (size) {
|
|
case Size::Size_32_32_32_32:
|
|
return "32_32_32_32";
|
|
case Size::Size_32_32_32:
|
|
return "32_32_32";
|
|
case Size::Size_16_16_16_16:
|
|
return "16_16_16_16";
|
|
case Size::Size_32_32:
|
|
return "32_32";
|
|
case Size::Size_16_16_16:
|
|
return "16_16_16";
|
|
case Size::Size_8_8_8_8:
|
|
return "8_8_8_8";
|
|
case Size::Size_16_16:
|
|
return "16_16";
|
|
case Size::Size_32:
|
|
return "32";
|
|
case Size::Size_8_8_8:
|
|
return "8_8_8";
|
|
case Size::Size_8_8:
|
|
return "8_8";
|
|
case Size::Size_16:
|
|
return "16";
|
|
case Size::Size_8:
|
|
return "8";
|
|
case Size::Size_10_10_10_2:
|
|
return "10_10_10_2";
|
|
case Size::Size_11_11_10:
|
|
return "11_11_10";
|
|
default:
|
|
UNREACHABLE();
|
|
return {};
|
|
}
|
|
}
|
|
|
|
std::string TypeString() const {
|
|
switch (type) {
|
|
case Type::SignedNorm:
|
|
return "SNORM";
|
|
case Type::UnsignedNorm:
|
|
return "UNORM";
|
|
case Type::SignedInt:
|
|
return "SINT";
|
|
case Type::UnsignedInt:
|
|
return "UINT";
|
|
case Type::UnsignedScaled:
|
|
return "USCALED";
|
|
case Type::SignedScaled:
|
|
return "SSCALED";
|
|
case Type::Float:
|
|
return "FLOAT";
|
|
}
|
|
UNREACHABLE();
|
|
return {};
|
|
}
|
|
|
|
bool IsNormalized() const {
|
|
return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
|
|
}
|
|
|
|
bool IsConstant() const {
|
|
return constant;
|
|
}
|
|
|
|
bool IsValid() const {
|
|
return size != Size::Invalid;
|
|
}
|
|
|
|
bool operator<(const VertexAttribute& other) const {
|
|
return hex < other.hex;
|
|
}
|
|
};
|
|
|
|
struct MsaaSampleLocation {
|
|
union {
|
|
BitField<0, 4, u32> x0;
|
|
BitField<4, 4, u32> y0;
|
|
BitField<8, 4, u32> x1;
|
|
BitField<12, 4, u32> y1;
|
|
BitField<16, 4, u32> x2;
|
|
BitField<20, 4, u32> y2;
|
|
BitField<24, 4, u32> x3;
|
|
BitField<28, 4, u32> y3;
|
|
};
|
|
|
|
constexpr std::pair<u32, u32> Location(int index) const {
|
|
switch (index) {
|
|
case 0:
|
|
return {x0, y0};
|
|
case 1:
|
|
return {x1, y1};
|
|
case 2:
|
|
return {x2, y2};
|
|
case 3:
|
|
return {x3, y3};
|
|
default:
|
|
UNREACHABLE();
|
|
return {0, 0};
|
|
}
|
|
}
|
|
};
|
|
|
|
enum class DepthMode : u32 {
|
|
MinusOneToOne = 0,
|
|
ZeroToOne = 1,
|
|
};
|
|
|
|
enum class PrimitiveTopology : u32 {
|
|
Points = 0x0,
|
|
Lines = 0x1,
|
|
LineLoop = 0x2,
|
|
LineStrip = 0x3,
|
|
Triangles = 0x4,
|
|
TriangleStrip = 0x5,
|
|
TriangleFan = 0x6,
|
|
Quads = 0x7,
|
|
QuadStrip = 0x8,
|
|
Polygon = 0x9,
|
|
LinesAdjacency = 0xa,
|
|
LineStripAdjacency = 0xb,
|
|
TrianglesAdjacency = 0xc,
|
|
TriangleStripAdjacency = 0xd,
|
|
Patches = 0xe,
|
|
};
|
|
|
|
enum class IndexFormat : u32 {
|
|
UnsignedByte = 0x0,
|
|
UnsignedShort = 0x1,
|
|
UnsignedInt = 0x2,
|
|
};
|
|
|
|
enum class ComparisonOp : u32 {
|
|
// These values are used by Nouveau and most games, they correspond to the OpenGL token
|
|
// values for these operations.
|
|
Never = 0x200,
|
|
Less = 0x201,
|
|
Equal = 0x202,
|
|
LessEqual = 0x203,
|
|
Greater = 0x204,
|
|
NotEqual = 0x205,
|
|
GreaterEqual = 0x206,
|
|
Always = 0x207,
|
|
|
|
// These values are used by some games, they seem to be NV04 values.
|
|
NeverOld = 1,
|
|
LessOld = 2,
|
|
EqualOld = 3,
|
|
LessEqualOld = 4,
|
|
GreaterOld = 5,
|
|
NotEqualOld = 6,
|
|
GreaterEqualOld = 7,
|
|
AlwaysOld = 8,
|
|
};
|
|
|
|
enum class LogicOperation : u32 {
|
|
Clear = 0x1500,
|
|
And = 0x1501,
|
|
AndReverse = 0x1502,
|
|
Copy = 0x1503,
|
|
AndInverted = 0x1504,
|
|
NoOp = 0x1505,
|
|
Xor = 0x1506,
|
|
Or = 0x1507,
|
|
Nor = 0x1508,
|
|
Equiv = 0x1509,
|
|
Invert = 0x150A,
|
|
OrReverse = 0x150B,
|
|
CopyInverted = 0x150C,
|
|
OrInverted = 0x150D,
|
|
Nand = 0x150E,
|
|
Set = 0x150F,
|
|
};
|
|
|
|
enum class StencilOp : u32 {
|
|
Keep = 1,
|
|
Zero = 2,
|
|
Replace = 3,
|
|
Incr = 4,
|
|
Decr = 5,
|
|
Invert = 6,
|
|
IncrWrap = 7,
|
|
DecrWrap = 8,
|
|
KeepOGL = 0x1E00,
|
|
ZeroOGL = 0,
|
|
ReplaceOGL = 0x1E01,
|
|
IncrOGL = 0x1E02,
|
|
DecrOGL = 0x1E03,
|
|
InvertOGL = 0x150A,
|
|
IncrWrapOGL = 0x8507,
|
|
DecrWrapOGL = 0x8508,
|
|
};
|
|
|
|
enum class MemoryLayout : u32 {
|
|
Linear = 0,
|
|
BlockLinear = 1,
|
|
};
|
|
|
|
enum class InvMemoryLayout : u32 {
|
|
BlockLinear = 0,
|
|
Linear = 1,
|
|
};
|
|
|
|
enum class CounterReset : u32 {
|
|
SampleCnt = 0x01,
|
|
Unk02 = 0x02,
|
|
Unk03 = 0x03,
|
|
Unk04 = 0x04,
|
|
EmittedPrimitives = 0x10, // Not tested
|
|
Unk11 = 0x11,
|
|
Unk12 = 0x12,
|
|
Unk13 = 0x13,
|
|
Unk15 = 0x15,
|
|
Unk16 = 0x16,
|
|
Unk17 = 0x17,
|
|
Unk18 = 0x18,
|
|
Unk1A = 0x1A,
|
|
Unk1B = 0x1B,
|
|
Unk1C = 0x1C,
|
|
Unk1D = 0x1D,
|
|
Unk1E = 0x1E,
|
|
GeneratedPrimitives = 0x1F,
|
|
};
|
|
|
|
enum class FrontFace : u32 {
|
|
ClockWise = 0x0900,
|
|
CounterClockWise = 0x0901,
|
|
};
|
|
|
|
enum class CullFace : u32 {
|
|
Front = 0x0404,
|
|
Back = 0x0405,
|
|
FrontAndBack = 0x0408,
|
|
};
|
|
|
|
struct Blend {
|
|
enum class Equation : u32 {
|
|
Add = 1,
|
|
Subtract = 2,
|
|
ReverseSubtract = 3,
|
|
Min = 4,
|
|
Max = 5,
|
|
|
|
// These values are used by Nouveau and some games.
|
|
AddGL = 0x8006,
|
|
SubtractGL = 0x8007,
|
|
ReverseSubtractGL = 0x8008,
|
|
MinGL = 0x800a,
|
|
MaxGL = 0x800b
|
|
};
|
|
|
|
enum class Factor : u32 {
|
|
Zero = 0x1,
|
|
One = 0x2,
|
|
SourceColor = 0x3,
|
|
OneMinusSourceColor = 0x4,
|
|
SourceAlpha = 0x5,
|
|
OneMinusSourceAlpha = 0x6,
|
|
DestAlpha = 0x7,
|
|
OneMinusDestAlpha = 0x8,
|
|
DestColor = 0x9,
|
|
OneMinusDestColor = 0xa,
|
|
SourceAlphaSaturate = 0xb,
|
|
Source1Color = 0x10,
|
|
OneMinusSource1Color = 0x11,
|
|
Source1Alpha = 0x12,
|
|
OneMinusSource1Alpha = 0x13,
|
|
ConstantColor = 0x61,
|
|
OneMinusConstantColor = 0x62,
|
|
ConstantAlpha = 0x63,
|
|
OneMinusConstantAlpha = 0x64,
|
|
|
|
// These values are used by Nouveau and some games.
|
|
ZeroGL = 0x4000,
|
|
OneGL = 0x4001,
|
|
SourceColorGL = 0x4300,
|
|
OneMinusSourceColorGL = 0x4301,
|
|
SourceAlphaGL = 0x4302,
|
|
OneMinusSourceAlphaGL = 0x4303,
|
|
DestAlphaGL = 0x4304,
|
|
OneMinusDestAlphaGL = 0x4305,
|
|
DestColorGL = 0x4306,
|
|
OneMinusDestColorGL = 0x4307,
|
|
SourceAlphaSaturateGL = 0x4308,
|
|
ConstantColorGL = 0xc001,
|
|
OneMinusConstantColorGL = 0xc002,
|
|
ConstantAlphaGL = 0xc003,
|
|
OneMinusConstantAlphaGL = 0xc004,
|
|
Source1ColorGL = 0xc900,
|
|
OneMinusSource1ColorGL = 0xc901,
|
|
Source1AlphaGL = 0xc902,
|
|
OneMinusSource1AlphaGL = 0xc903,
|
|
};
|
|
|
|
u32 separate_alpha;
|
|
Equation equation_rgb;
|
|
Factor factor_source_rgb;
|
|
Factor factor_dest_rgb;
|
|
Equation equation_a;
|
|
Factor factor_source_a;
|
|
Factor factor_dest_a;
|
|
INSERT_UNION_PADDING_WORDS(1);
|
|
};
|
|
|
|
enum class TessellationPrimitive : u32 {
|
|
Isolines = 0,
|
|
Triangles = 1,
|
|
Quads = 2,
|
|
};
|
|
|
|
enum class TessellationSpacing : u32 {
|
|
Equal = 0,
|
|
FractionalOdd = 1,
|
|
FractionalEven = 2,
|
|
};
|
|
|
|
enum class PolygonMode : u32 {
|
|
Point = 0x1b00,
|
|
Line = 0x1b01,
|
|
Fill = 0x1b02,
|
|
};
|
|
|
|
enum class ShadowRamControl : u32 {
|
|
// write value to shadow ram
|
|
Track = 0,
|
|
// write value to shadow ram ( with validation ??? )
|
|
TrackWithFilter = 1,
|
|
// only write to real hw register
|
|
Passthrough = 2,
|
|
// write value from shadow ram to real hw register
|
|
Replay = 3,
|
|
};
|
|
|
|
enum class ViewportSwizzle : u32 {
|
|
PositiveX = 0,
|
|
NegativeX = 1,
|
|
PositiveY = 2,
|
|
NegativeY = 3,
|
|
PositiveZ = 4,
|
|
NegativeZ = 5,
|
|
PositiveW = 6,
|
|
NegativeW = 7,
|
|
};
|
|
|
|
struct RenderTargetConfig {
|
|
u32 address_high;
|
|
u32 address_low;
|
|
u32 width;
|
|
u32 height;
|
|
Tegra::RenderTargetFormat format;
|
|
union {
|
|
BitField<0, 3, u32> block_width;
|
|
BitField<4, 3, u32> block_height;
|
|
BitField<8, 3, u32> block_depth;
|
|
BitField<12, 1, InvMemoryLayout> type;
|
|
} memory_layout;
|
|
union {
|
|
BitField<0, 16, u32> layers;
|
|
BitField<16, 1, u32> volume;
|
|
};
|
|
u32 layer_stride;
|
|
u32 base_layer;
|
|
INSERT_UNION_PADDING_WORDS(7);
|
|
|
|
GPUVAddr Address() const {
|
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
|
address_low);
|
|
}
|
|
};
|
|
|
|
struct ColorMask {
|
|
union {
|
|
u32 raw;
|
|
BitField<0, 4, u32> R;
|
|
BitField<4, 4, u32> G;
|
|
BitField<8, 4, u32> B;
|
|
BitField<12, 4, u32> A;
|
|
};
|
|
};
|
|
|
|
struct ViewportTransform {
|
|
f32 scale_x;
|
|
f32 scale_y;
|
|
f32 scale_z;
|
|
f32 translate_x;
|
|
f32 translate_y;
|
|
f32 translate_z;
|
|
union {
|
|
u32 raw;
|
|
BitField<0, 3, ViewportSwizzle> x;
|
|
BitField<4, 3, ViewportSwizzle> y;
|
|
BitField<8, 3, ViewportSwizzle> z;
|
|
BitField<12, 3, ViewportSwizzle> w;
|
|
} swizzle;
|
|
INSERT_UNION_PADDING_WORDS(1);
|
|
|
|
Common::Rectangle<f32> GetRect() const {
|
|
return {
|
|
GetX(), // left
|
|
GetY() + GetHeight(), // top
|
|
GetX() + GetWidth(), // right
|
|
GetY() // bottom
|
|
};
|
|
};
|
|
|
|
f32 GetX() const {
|
|
return std::max(0.0f, translate_x - std::fabs(scale_x));
|
|
}
|
|
|
|
f32 GetY() const {
|
|
return std::max(0.0f, translate_y - std::fabs(scale_y));
|
|
}
|
|
|
|
f32 GetWidth() const {
|
|
return translate_x + std::fabs(scale_x) - GetX();
|
|
}
|
|
|
|
f32 GetHeight() const {
|
|
return translate_y + std::fabs(scale_y) - GetY();
|
|
}
|
|
};
|
|
|
|
struct ScissorTest {
|
|
u32 enable;
|
|
union {
|
|
BitField<0, 16, u32> min_x;
|
|
BitField<16, 16, u32> max_x;
|
|
};
|
|
union {
|
|
BitField<0, 16, u32> min_y;
|
|
BitField<16, 16, u32> max_y;
|
|
};
|
|
u32 fill;
|
|
};
|
|
|
|
struct ViewPort {
|
|
union {
|
|
BitField<0, 16, u32> x;
|
|
BitField<16, 16, u32> width;
|
|
};
|
|
union {
|
|
BitField<0, 16, u32> y;
|
|
BitField<16, 16, u32> height;
|
|
};
|
|
float depth_range_near;
|
|
float depth_range_far;
|
|
};
|
|
|
|
struct TransformFeedbackBinding {
|
|
u32 buffer_enable;
|
|
u32 address_high;
|
|
u32 address_low;
|
|
s32 buffer_size;
|
|
s32 buffer_offset;
|
|
INSERT_UNION_PADDING_WORDS(3);
|
|
|
|
GPUVAddr Address() const {
|
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
|
address_low);
|
|
}
|
|
};
|
|
static_assert(sizeof(TransformFeedbackBinding) == 32);
|
|
|
|
struct TransformFeedbackLayout {
|
|
u32 stream;
|
|
u32 varying_count;
|
|
u32 stride;
|
|
INSERT_UNION_PADDING_WORDS(1);
|
|
};
|
|
static_assert(sizeof(TransformFeedbackLayout) == 16);
|
|
|
|
bool IsShaderConfigEnabled(std::size_t index) const {
|
|
// The VertexB is always enabled.
|
|
if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
|
|
return true;
|
|
}
|
|
return shader_config[index].enable != 0;
|
|
}
|
|
|
|
bool IsShaderConfigEnabled(Regs::ShaderProgram type) const {
|
|
return IsShaderConfigEnabled(static_cast<std::size_t>(type));
|
|
}
|
|
|
|
union {
|
|
struct {
|
|
INSERT_UNION_PADDING_WORDS(0x44);
|
|
|
|
u32 wait_for_idle;
|
|
|
|
struct {
|
|
u32 upload_address;
|
|
u32 data;
|
|
u32 entry;
|
|
u32 bind;
|
|
} macros;
|
|
|
|
ShadowRamControl shadow_ram_control;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x16);
|
|
|
|
Upload::Registers upload;
|
|
struct {
|
|
union {
|
|
BitField<0, 1, u32> linear;
|
|
};
|
|
} exec_upload;
|
|
|
|
u32 data_upload;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x44);
|
|
|
|
struct {
|
|
union {
|
|
BitField<0, 16, u32> sync_point;
|
|
BitField<16, 1, u32> unknown;
|
|
BitField<20, 1, u32> increment;
|
|
};
|
|
} sync_info;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x15);
|
|
|
|
union {
|
|
BitField<0, 2, TessellationPrimitive> prim;
|
|
BitField<4, 2, TessellationSpacing> spacing;
|
|
BitField<8, 1, u32> cw;
|
|
BitField<9, 1, u32> connected;
|
|
} tess_mode;
|
|
|
|
std::array<f32, 4> tess_level_outer;
|
|
std::array<f32, 2> tess_level_inner;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x10);
|
|
|
|
u32 rasterize_enable;
|
|
|
|
std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0xC0);
|
|
|
|
std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x1);
|
|
|
|
u32 tfb_enabled;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x2E);
|
|
|
|
std::array<RenderTargetConfig, NumRenderTargets> rt;
|
|
|
|
std::array<ViewportTransform, NumViewports> viewport_transform;
|
|
|
|
std::array<ViewPort, NumViewports> viewports;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x1D);
|
|
|
|
struct {
|
|
u32 first;
|
|
u32 count;
|
|
} vertex_buffer;
|
|
|
|
DepthMode depth_mode;
|
|
|
|
float clear_color[4];
|
|
float clear_depth;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x3);
|
|
|
|
s32 clear_stencil;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x2);
|
|
|
|
PolygonMode polygon_mode_front;
|
|
PolygonMode polygon_mode_back;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x3);
|
|
|
|
u32 polygon_offset_point_enable;
|
|
u32 polygon_offset_line_enable;
|
|
u32 polygon_offset_fill_enable;
|
|
|
|
u32 patch_vertices;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0xC);
|
|
|
|
std::array<ScissorTest, NumViewports> scissor_test;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x15);
|
|
|
|
s32 stencil_back_func_ref;
|
|
u32 stencil_back_mask;
|
|
u32 stencil_back_func_mask;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0xC);
|
|
|
|
u32 color_mask_common;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x2);
|
|
|
|
f32 depth_bounds[2];
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x2);
|
|
|
|
u32 rt_separate_frag_data;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x1);
|
|
|
|
u32 multisample_raster_enable;
|
|
u32 multisample_raster_samples;
|
|
std::array<u32, 4> multisample_sample_mask;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x5);
|
|
|
|
struct {
|
|
u32 address_high;
|
|
u32 address_low;
|
|
Tegra::DepthFormat format;
|
|
union {
|
|
BitField<0, 4, u32> block_width;
|
|
BitField<4, 4, u32> block_height;
|
|
BitField<8, 4, u32> block_depth;
|
|
BitField<20, 1, InvMemoryLayout> type;
|
|
} memory_layout;
|
|
u32 layer_stride;
|
|
|
|
GPUVAddr Address() const {
|
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
|
address_low);
|
|
}
|
|
} zeta;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x41);
|
|
|
|
union {
|
|
BitField<0, 4, u32> stencil;
|
|
BitField<4, 4, u32> unknown;
|
|
BitField<8, 4, u32> scissor;
|
|
BitField<12, 4, u32> viewport;
|
|
} clear_flags;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x10);
|
|
|
|
u32 fill_rectangle;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x8);
|
|
|
|
std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
|
|
|
|
std::array<MsaaSampleLocation, 4> multisample_sample_locations;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x2);
|
|
|
|
union {
|
|
BitField<0, 1, u32> enable;
|
|
BitField<4, 3, u32> target;
|
|
} multisample_coverage_to_color;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x8);
|
|
|
|
struct {
|
|
union {
|
|
BitField<0, 4, u32> count;
|
|
BitField<4, 3, u32> map_0;
|
|
BitField<7, 3, u32> map_1;
|
|
BitField<10, 3, u32> map_2;
|
|
BitField<13, 3, u32> map_3;
|
|
BitField<16, 3, u32> map_4;
|
|
BitField<19, 3, u32> map_5;
|
|
BitField<22, 3, u32> map_6;
|
|
BitField<25, 3, u32> map_7;
|
|
};
|
|
|
|
u32 GetMap(std::size_t index) const {
|
|
const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
|
|
map_4, map_5, map_6, map_7};
|
|
ASSERT(index < maps.size());
|
|
return maps[index];
|
|
}
|
|
} rt_control;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x2);
|
|
|
|
u32 zeta_width;
|
|
u32 zeta_height;
|
|
union {
|
|
BitField<0, 16, u32> zeta_layers;
|
|
BitField<16, 1, u32> zeta_volume;
|
|
};
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x26);
|
|
|
|
u32 depth_test_enable;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x5);
|
|
|
|
u32 independent_blend_enable;
|
|
|
|
u32 depth_write_enabled;
|
|
|
|
u32 alpha_test_enabled;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x6);
|
|
|
|
u32 d3d_cull_mode;
|
|
|
|
ComparisonOp depth_test_func;
|
|
float alpha_test_ref;
|
|
ComparisonOp alpha_test_func;
|
|
u32 draw_tfb_stride;
|
|
struct {
|
|
float r;
|
|
float g;
|
|
float b;
|
|
float a;
|
|
} blend_color;
|
|
INSERT_UNION_PADDING_WORDS(0x4);
|
|
|
|
struct {
|
|
u32 separate_alpha;
|
|
Blend::Equation equation_rgb;
|
|
Blend::Factor factor_source_rgb;
|
|
Blend::Factor factor_dest_rgb;
|
|
Blend::Equation equation_a;
|
|
Blend::Factor factor_source_a;
|
|
INSERT_UNION_PADDING_WORDS(1);
|
|
Blend::Factor factor_dest_a;
|
|
|
|
u32 enable_common;
|
|
u32 enable[NumRenderTargets];
|
|
} blend;
|
|
|
|
u32 stencil_enable;
|
|
StencilOp stencil_front_op_fail;
|
|
StencilOp stencil_front_op_zfail;
|
|
StencilOp stencil_front_op_zpass;
|
|
ComparisonOp stencil_front_func_func;
|
|
s32 stencil_front_func_ref;
|
|
u32 stencil_front_func_mask;
|
|
u32 stencil_front_mask;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x2);
|
|
|
|
u32 frag_color_clamp;
|
|
|
|
union {
|
|
BitField<0, 1, u32> y_negate;
|
|
BitField<4, 1, u32> triangle_rast_flip;
|
|
} screen_y_control;
|
|
|
|
float line_width_smooth;
|
|
float line_width_aliased;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x1F);
|
|
|
|
u32 vb_element_base;
|
|
u32 vb_base_instance;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x35);
|
|
|
|
u32 clip_distance_enabled;
|
|
|
|
u32 samplecnt_enable;
|
|
|
|
float point_size;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x1);
|
|
|
|
u32 point_sprite_enable;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x3);
|
|
|
|
CounterReset counter_reset;
|
|
|
|
u32 multisample_enable;
|
|
|
|
u32 zeta_enable;
|
|
|
|
union {
|
|
BitField<0, 1, u32> alpha_to_coverage;
|
|
BitField<4, 1, u32> alpha_to_one;
|
|
} multisample_control;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x4);
|
|
|
|
struct {
|
|
u32 address_high;
|
|
u32 address_low;
|
|
ConditionMode mode;
|
|
|
|
GPUVAddr Address() const {
|
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
|
address_low);
|
|
}
|
|
} condition;
|
|
|
|
struct {
|
|
u32 tsc_address_high;
|
|
u32 tsc_address_low;
|
|
u32 tsc_limit;
|
|
|
|
GPUVAddr TSCAddress() const {
|
|
return static_cast<GPUVAddr>(
|
|
(static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low);
|
|
}
|
|
} tsc;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x1);
|
|
|
|
float polygon_offset_factor;
|
|
|
|
u32 line_smooth_enable;
|
|
|
|
struct {
|
|
u32 tic_address_high;
|
|
u32 tic_address_low;
|
|
u32 tic_limit;
|
|
|
|
GPUVAddr TICAddress() const {
|
|
return static_cast<GPUVAddr>(
|
|
(static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low);
|
|
}
|
|
} tic;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x5);
|
|
|
|
u32 stencil_two_side_enable;
|
|
StencilOp stencil_back_op_fail;
|
|
StencilOp stencil_back_op_zfail;
|
|
StencilOp stencil_back_op_zpass;
|
|
ComparisonOp stencil_back_func_func;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x4);
|
|
|
|
u32 framebuffer_srgb;
|
|
|
|
float polygon_offset_units;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x4);
|
|
|
|
Tegra::Texture::MsaaMode multisample_mode;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0xC);
|
|
|
|
union {
|
|
BitField<2, 1, u32> coord_origin;
|
|
BitField<3, 10, u32> enable;
|
|
} point_coord_replace;
|
|
|
|
struct {
|
|
u32 code_address_high;
|
|
u32 code_address_low;
|
|
|
|
GPUVAddr CodeAddress() const {
|
|
return static_cast<GPUVAddr>(
|
|
(static_cast<GPUVAddr>(code_address_high) << 32) | code_address_low);
|
|
}
|
|
} code_address;
|
|
INSERT_UNION_PADDING_WORDS(1);
|
|
|
|
struct {
|
|
u32 vertex_end_gl;
|
|
union {
|
|
u32 vertex_begin_gl;
|
|
BitField<0, 16, PrimitiveTopology> topology;
|
|
BitField<26, 1, u32> instance_next;
|
|
BitField<27, 1, u32> instance_cont;
|
|
};
|
|
} draw;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0xA);
|
|
|
|
struct {
|
|
u32 enabled;
|
|
u32 index;
|
|
} primitive_restart;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x5F);
|
|
|
|
struct {
|
|
u32 start_addr_high;
|
|
u32 start_addr_low;
|
|
u32 end_addr_high;
|
|
u32 end_addr_low;
|
|
IndexFormat format;
|
|
u32 first;
|
|
u32 count;
|
|
|
|
unsigned FormatSizeInBytes() const {
|
|
switch (format) {
|
|
case IndexFormat::UnsignedByte:
|
|
return 1;
|
|
case IndexFormat::UnsignedShort:
|
|
return 2;
|
|
case IndexFormat::UnsignedInt:
|
|
return 4;
|
|
}
|
|
UNREACHABLE();
|
|
return 1;
|
|
}
|
|
|
|
GPUVAddr StartAddress() const {
|
|
return static_cast<GPUVAddr>(
|
|
(static_cast<GPUVAddr>(start_addr_high) << 32) | start_addr_low);
|
|
}
|
|
|
|
GPUVAddr EndAddress() const {
|
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) |
|
|
end_addr_low);
|
|
}
|
|
|
|
/// Adjust the index buffer offset so it points to the first desired index.
|
|
GPUVAddr IndexStart() const {
|
|
return StartAddress() + static_cast<size_t>(first) *
|
|
static_cast<size_t>(FormatSizeInBytes());
|
|
}
|
|
} index_array;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x7);
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x1F);
|
|
|
|
float polygon_offset_clamp;
|
|
|
|
struct {
|
|
u32 is_instanced[NumVertexArrays];
|
|
|
|
/// Returns whether the vertex array specified by index is supposed to be
|
|
/// accessed per instance or not.
|
|
bool IsInstancingEnabled(std::size_t index) const {
|
|
return is_instanced[index];
|
|
}
|
|
} instanced_arrays;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x4);
|
|
|
|
union {
|
|
BitField<0, 1, u32> enable;
|
|
BitField<4, 8, u32> unk4;
|
|
} vp_point_size;
|
|
|
|
INSERT_UNION_PADDING_WORDS(1);
|
|
|
|
u32 cull_test_enabled;
|
|
FrontFace front_face;
|
|
CullFace cull_face;
|
|
|
|
u32 pixel_center_integer;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x1);
|
|
|
|
u32 viewport_transform_enabled;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x3);
|
|
|
|
union {
|
|
BitField<0, 1, u32> depth_range_0_1;
|
|
BitField<3, 1, u32> depth_clamp_near;
|
|
BitField<4, 1, u32> depth_clamp_far;
|
|
BitField<11, 1, u32> depth_clamp_disabled;
|
|
} view_volume_clip_control;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x1F);
|
|
|
|
u32 depth_bounds_enable;
|
|
|
|
INSERT_UNION_PADDING_WORDS(1);
|
|
|
|
struct {
|
|
u32 enable;
|
|
LogicOperation operation;
|
|
} logic_op;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x1);
|
|
|
|
union {
|
|
u32 raw;
|
|
BitField<0, 1, u32> Z;
|
|
BitField<1, 1, u32> S;
|
|
BitField<2, 1, u32> R;
|
|
BitField<3, 1, u32> G;
|
|
BitField<4, 1, u32> B;
|
|
BitField<5, 1, u32> A;
|
|
BitField<6, 4, u32> RT;
|
|
BitField<10, 11, u32> layer;
|
|
} clear_buffers;
|
|
INSERT_UNION_PADDING_WORDS(0xB);
|
|
std::array<ColorMask, NumRenderTargets> color_mask;
|
|
INSERT_UNION_PADDING_WORDS(0x38);
|
|
|
|
struct {
|
|
u32 query_address_high;
|
|
u32 query_address_low;
|
|
u32 query_sequence;
|
|
union {
|
|
u32 raw;
|
|
BitField<0, 2, QueryOperation> operation;
|
|
BitField<4, 1, u32> fence;
|
|
BitField<12, 4, QueryUnit> unit;
|
|
BitField<16, 1, QuerySyncCondition> sync_cond;
|
|
BitField<23, 5, QuerySelect> select;
|
|
BitField<28, 1, u32> short_query;
|
|
} query_get;
|
|
|
|
GPUVAddr QueryAddress() const {
|
|
return static_cast<GPUVAddr>(
|
|
(static_cast<GPUVAddr>(query_address_high) << 32) | query_address_low);
|
|
}
|
|
} query;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x3C);
|
|
|
|
struct {
|
|
union {
|
|
BitField<0, 12, u32> stride;
|
|
BitField<12, 1, u32> enable;
|
|
};
|
|
u32 start_high;
|
|
u32 start_low;
|
|
u32 divisor;
|
|
|
|
GPUVAddr StartAddress() const {
|
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) |
|
|
start_low);
|
|
}
|
|
|
|
bool IsEnabled() const {
|
|
return enable != 0 && StartAddress() != 0;
|
|
}
|
|
|
|
} vertex_array[NumVertexArrays];
|
|
|
|
Blend independent_blend[NumRenderTargets];
|
|
|
|
struct {
|
|
u32 limit_high;
|
|
u32 limit_low;
|
|
|
|
GPUVAddr LimitAddress() const {
|
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) |
|
|
limit_low) +
|
|
1;
|
|
}
|
|
} vertex_array_limit[NumVertexArrays];
|
|
|
|
struct {
|
|
union {
|
|
BitField<0, 1, u32> enable;
|
|
BitField<4, 4, ShaderProgram> program;
|
|
};
|
|
u32 offset;
|
|
INSERT_UNION_PADDING_WORDS(14);
|
|
} shader_config[MaxShaderProgram];
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x60);
|
|
|
|
u32 firmware[0x20];
|
|
|
|
struct {
|
|
u32 cb_size;
|
|
u32 cb_address_high;
|
|
u32 cb_address_low;
|
|
u32 cb_pos;
|
|
u32 cb_data[NumCBData];
|
|
|
|
GPUVAddr BufferAddress() const {
|
|
return static_cast<GPUVAddr>(
|
|
(static_cast<GPUVAddr>(cb_address_high) << 32) | cb_address_low);
|
|
}
|
|
} const_buffer;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x10);
|
|
|
|
struct {
|
|
union {
|
|
u32 raw_config;
|
|
BitField<0, 1, u32> valid;
|
|
BitField<4, 5, u32> index;
|
|
};
|
|
INSERT_UNION_PADDING_WORDS(7);
|
|
} cb_bind[MaxShaderStage];
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x56);
|
|
|
|
u32 tex_cb_index;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x7D);
|
|
|
|
std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x298);
|
|
|
|
struct {
|
|
/// Compressed address of a buffer that holds information about bound SSBOs.
|
|
/// This address is usually bound to c0 in the shaders.
|
|
u32 buffer_address;
|
|
|
|
GPUVAddr BufferAddress() const {
|
|
return static_cast<GPUVAddr>(buffer_address) << 8;
|
|
}
|
|
} ssbo_info;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0x11);
|
|
|
|
struct {
|
|
u32 address[MaxShaderStage];
|
|
u32 size[MaxShaderStage];
|
|
} tex_info_buffers;
|
|
|
|
INSERT_UNION_PADDING_WORDS(0xCC);
|
|
};
|
|
std::array<u32, NUM_REGS> reg_array;
|
|
};
|
|
};
|
|
|
|
Regs regs{};
|
|
|
|
/// Store temporary hw register values, used by some calls to restore state after a operation
|
|
Regs shadow_state;
|
|
|
|
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
|
|
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
|
|
|
|
struct State {
|
|
struct ShaderStageInfo {
|
|
std::array<ConstBufferInfo, Regs::MaxConstBuffers> const_buffers;
|
|
};
|
|
|
|
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
|
|
u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
|
|
};
|
|
|
|
State state{};
|
|
|
|
/// Reads a register value located at the input method address
|
|
u32 GetRegisterValue(u32 method) const;
|
|
|
|
/// Write the value to the register identified by method.
|
|
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
|
|
|
|
/// Write multiple values to the register identified by method.
|
|
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
|
u32 methods_pending) override;
|
|
|
|
/// Write the value to the register identified by method.
|
|
void CallMethodFromMME(u32 method, u32 method_argument);
|
|
|
|
void FlushMMEInlineDraw();
|
|
|
|
/// Given a texture handle, returns the TSC and TIC entries.
|
|
Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
|
|
|
|
/// Returns the texture information for a specific texture in a specific shader stage.
|
|
Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const;
|
|
|
|
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
|
|
|
|
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
|
|
|
|
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
|
u64 offset) const override;
|
|
|
|
SamplerDescriptor AccessSampler(u32 handle) const override;
|
|
|
|
u32 GetBoundBuffer() const override {
|
|
return regs.tex_cb_index;
|
|
}
|
|
|
|
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
|
|
|
|
const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
|
|
|
|
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
|
|
/// we've seen used.
|
|
using MacroMemory = std::array<u32, 0x40000>;
|
|
|
|
/// Gets a reference to macro memory.
|
|
const MacroMemory& GetMacroMemory() const {
|
|
return macro_memory;
|
|
}
|
|
|
|
bool ShouldExecute() const {
|
|
return execute_on;
|
|
}
|
|
|
|
/// Notify a memory write has happened.
|
|
void OnMemoryWrite() {
|
|
dirty.flags |= dirty.on_write_stores;
|
|
}
|
|
|
|
enum class MMEDrawMode : u32 {
|
|
Undefined,
|
|
Array,
|
|
Indexed,
|
|
};
|
|
|
|
struct MMEDrawState {
|
|
MMEDrawMode current_mode{MMEDrawMode::Undefined};
|
|
u32 current_count{};
|
|
u32 instance_count{};
|
|
bool instance_mode{};
|
|
bool gl_begin_consume{};
|
|
u32 gl_end_count{};
|
|
} mme_draw;
|
|
|
|
struct DirtyState {
|
|
using Flags = std::bitset<std::numeric_limits<u8>::max()>;
|
|
using Table = std::array<u8, Regs::NUM_REGS>;
|
|
using Tables = std::array<Table, 2>;
|
|
|
|
Flags flags;
|
|
Flags on_write_stores;
|
|
Tables tables{};
|
|
} dirty;
|
|
|
|
private:
|
|
void InitializeRegisterDefaults();
|
|
|
|
Core::System& system;
|
|
|
|
VideoCore::RasterizerInterface& rasterizer;
|
|
|
|
MemoryManager& memory_manager;
|
|
|
|
/// Start offsets of each macro in macro_memory
|
|
std::array<u32, 0x80> macro_positions = {};
|
|
|
|
std::array<bool, Regs::NUM_REGS> mme_inline{};
|
|
|
|
/// Memory for macro code
|
|
MacroMemory macro_memory;
|
|
|
|
/// Macro method that is currently being executed / being fed parameters.
|
|
u32 executing_macro = 0;
|
|
/// Parameters that have been submitted to the macro call so far.
|
|
std::vector<u32> macro_params;
|
|
|
|
/// Interpreter for the macro codes uploaded to the GPU.
|
|
MacroInterpreter macro_interpreter;
|
|
|
|
static constexpr u32 null_cb_data = 0xFFFFFFFF;
|
|
struct {
|
|
std::array<std::array<u32, 0x4000>, 16> buffer;
|
|
u32 current{null_cb_data};
|
|
u32 id{null_cb_data};
|
|
u32 start_pos{};
|
|
u32 counter{};
|
|
} cb_data_state;
|
|
|
|
Upload::State upload_state;
|
|
|
|
bool execute_on{true};
|
|
|
|
std::array<u8, Regs::NUM_REGS> dirty_pointers{};
|
|
|
|
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
|
Texture::TICEntry GetTICEntry(u32 tic_index) const;
|
|
|
|
/// Retrieves information about a specific TSC entry from the TSC buffer.
|
|
Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
|
|
|
|
/**
|
|
* Call a macro on this engine.
|
|
* @param method Method to call
|
|
* @param num_parameters Number of arguments
|
|
* @param parameters Arguments to the method call
|
|
*/
|
|
void CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters);
|
|
|
|
/// Handles writes to the macro uploading register.
|
|
void ProcessMacroUpload(u32 data);
|
|
|
|
/// Handles writes to the macro bind register.
|
|
void ProcessMacroBind(u32 data);
|
|
|
|
/// Handles firmware blob 4
|
|
void ProcessFirmwareCall4();
|
|
|
|
/// Handles a write to the CLEAR_BUFFERS register.
|
|
void ProcessClearBuffers();
|
|
|
|
/// Handles a write to the QUERY_GET register.
|
|
void ProcessQueryGet();
|
|
|
|
/// Writes the query result accordingly.
|
|
void StampQueryResult(u64 payload, bool long_query);
|
|
|
|
/// Handles conditional rendering.
|
|
void ProcessQueryCondition();
|
|
|
|
/// Handles counter resets.
|
|
void ProcessCounterReset();
|
|
|
|
/// Handles writes to syncing register.
|
|
void ProcessSyncPoint();
|
|
|
|
/// Handles a write to the CB_DATA[i] register.
|
|
void StartCBData(u32 method);
|
|
void ProcessCBData(u32 value);
|
|
void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount);
|
|
void FinishCBData();
|
|
|
|
/// Handles a write to the CB_BIND register.
|
|
void ProcessCBBind(std::size_t stage_index);
|
|
|
|
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
|
|
void DrawArrays();
|
|
|
|
// Handles a instance drawcall from MME
|
|
void StepInstance(MMEDrawMode expected_mode, u32 count);
|
|
|
|
/// Returns a query's value or an empty object if the value will be deferred through a cache.
|
|
std::optional<u64> GetQueryResult();
|
|
};
|
|
|
|
#define ASSERT_REG_POSITION(field_name, position) \
|
|
static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \
|
|
"Field " #field_name " has invalid position")
|
|
|
|
ASSERT_REG_POSITION(wait_for_idle, 0x44);
|
|
ASSERT_REG_POSITION(macros, 0x45);
|
|
ASSERT_REG_POSITION(shadow_ram_control, 0x49);
|
|
ASSERT_REG_POSITION(upload, 0x60);
|
|
ASSERT_REG_POSITION(exec_upload, 0x6C);
|
|
ASSERT_REG_POSITION(data_upload, 0x6D);
|
|
ASSERT_REG_POSITION(sync_info, 0xB2);
|
|
ASSERT_REG_POSITION(tess_mode, 0xC8);
|
|
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
|
|
ASSERT_REG_POSITION(tess_level_inner, 0xCD);
|
|
ASSERT_REG_POSITION(rasterize_enable, 0xDF);
|
|
ASSERT_REG_POSITION(tfb_bindings, 0xE0);
|
|
ASSERT_REG_POSITION(tfb_layouts, 0x1C0);
|
|
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
|
|
ASSERT_REG_POSITION(rt, 0x200);
|
|
ASSERT_REG_POSITION(viewport_transform, 0x280);
|
|
ASSERT_REG_POSITION(viewports, 0x300);
|
|
ASSERT_REG_POSITION(vertex_buffer, 0x35D);
|
|
ASSERT_REG_POSITION(depth_mode, 0x35F);
|
|
ASSERT_REG_POSITION(clear_color[0], 0x360);
|
|
ASSERT_REG_POSITION(clear_depth, 0x364);
|
|
ASSERT_REG_POSITION(clear_stencil, 0x368);
|
|
ASSERT_REG_POSITION(polygon_mode_front, 0x36B);
|
|
ASSERT_REG_POSITION(polygon_mode_back, 0x36C);
|
|
ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
|
|
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
|
|
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
|
|
ASSERT_REG_POSITION(patch_vertices, 0x373);
|
|
ASSERT_REG_POSITION(scissor_test, 0x380);
|
|
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
|
|
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
|
|
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
|
|
ASSERT_REG_POSITION(color_mask_common, 0x3E4);
|
|
ASSERT_REG_POSITION(depth_bounds, 0x3E7);
|
|
ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
|
|
ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
|
|
ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
|
|
ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
|
|
ASSERT_REG_POSITION(zeta, 0x3F8);
|
|
ASSERT_REG_POSITION(clear_flags, 0x43E);
|
|
ASSERT_REG_POSITION(fill_rectangle, 0x44F);
|
|
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
|
|
ASSERT_REG_POSITION(multisample_sample_locations, 0x478);
|
|
ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
|
|
ASSERT_REG_POSITION(rt_control, 0x487);
|
|
ASSERT_REG_POSITION(zeta_width, 0x48a);
|
|
ASSERT_REG_POSITION(zeta_height, 0x48b);
|
|
ASSERT_REG_POSITION(zeta_layers, 0x48c);
|
|
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
|
|
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
|
|
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
|
|
ASSERT_REG_POSITION(alpha_test_enabled, 0x4BB);
|
|
ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
|
|
ASSERT_REG_POSITION(depth_test_func, 0x4C3);
|
|
ASSERT_REG_POSITION(alpha_test_ref, 0x4C4);
|
|
ASSERT_REG_POSITION(alpha_test_func, 0x4C5);
|
|
ASSERT_REG_POSITION(draw_tfb_stride, 0x4C6);
|
|
ASSERT_REG_POSITION(blend_color, 0x4C7);
|
|
ASSERT_REG_POSITION(blend, 0x4CF);
|
|
ASSERT_REG_POSITION(stencil_enable, 0x4E0);
|
|
ASSERT_REG_POSITION(stencil_front_op_fail, 0x4E1);
|
|
ASSERT_REG_POSITION(stencil_front_op_zfail, 0x4E2);
|
|
ASSERT_REG_POSITION(stencil_front_op_zpass, 0x4E3);
|
|
ASSERT_REG_POSITION(stencil_front_func_func, 0x4E4);
|
|
ASSERT_REG_POSITION(stencil_front_func_ref, 0x4E5);
|
|
ASSERT_REG_POSITION(stencil_front_func_mask, 0x4E6);
|
|
ASSERT_REG_POSITION(stencil_front_mask, 0x4E7);
|
|
ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
|
|
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
|
|
ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
|
|
ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
|
|
ASSERT_REG_POSITION(vb_element_base, 0x50D);
|
|
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
|
|
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
|
|
ASSERT_REG_POSITION(samplecnt_enable, 0x545);
|
|
ASSERT_REG_POSITION(point_size, 0x546);
|
|
ASSERT_REG_POSITION(point_sprite_enable, 0x548);
|
|
ASSERT_REG_POSITION(counter_reset, 0x54C);
|
|
ASSERT_REG_POSITION(multisample_enable, 0x54D);
|
|
ASSERT_REG_POSITION(zeta_enable, 0x54E);
|
|
ASSERT_REG_POSITION(multisample_control, 0x54F);
|
|
ASSERT_REG_POSITION(condition, 0x554);
|
|
ASSERT_REG_POSITION(tsc, 0x557);
|
|
ASSERT_REG_POSITION(polygon_offset_factor, 0x55B);
|
|
ASSERT_REG_POSITION(line_smooth_enable, 0x55C);
|
|
ASSERT_REG_POSITION(tic, 0x55D);
|
|
ASSERT_REG_POSITION(stencil_two_side_enable, 0x565);
|
|
ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
|
|
ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567);
|
|
ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
|
|
ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
|
|
ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
|
|
ASSERT_REG_POSITION(polygon_offset_units, 0x56F);
|
|
ASSERT_REG_POSITION(multisample_mode, 0x574);
|
|
ASSERT_REG_POSITION(point_coord_replace, 0x581);
|
|
ASSERT_REG_POSITION(code_address, 0x582);
|
|
ASSERT_REG_POSITION(draw, 0x585);
|
|
ASSERT_REG_POSITION(primitive_restart, 0x591);
|
|
ASSERT_REG_POSITION(index_array, 0x5F2);
|
|
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
|
|
ASSERT_REG_POSITION(instanced_arrays, 0x620);
|
|
ASSERT_REG_POSITION(vp_point_size, 0x644);
|
|
ASSERT_REG_POSITION(cull_test_enabled, 0x646);
|
|
ASSERT_REG_POSITION(front_face, 0x647);
|
|
ASSERT_REG_POSITION(cull_face, 0x648);
|
|
ASSERT_REG_POSITION(pixel_center_integer, 0x649);
|
|
ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
|
|
ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
|
|
ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
|
|
ASSERT_REG_POSITION(logic_op, 0x671);
|
|
ASSERT_REG_POSITION(clear_buffers, 0x674);
|
|
ASSERT_REG_POSITION(color_mask, 0x680);
|
|
ASSERT_REG_POSITION(query, 0x6C0);
|
|
ASSERT_REG_POSITION(vertex_array[0], 0x700);
|
|
ASSERT_REG_POSITION(independent_blend, 0x780);
|
|
ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
|
|
ASSERT_REG_POSITION(shader_config[0], 0x800);
|
|
ASSERT_REG_POSITION(firmware, 0x8C0);
|
|
ASSERT_REG_POSITION(const_buffer, 0x8E0);
|
|
ASSERT_REG_POSITION(cb_bind[0], 0x904);
|
|
ASSERT_REG_POSITION(tex_cb_index, 0x982);
|
|
ASSERT_REG_POSITION(tfb_varying_locs, 0xA00);
|
|
ASSERT_REG_POSITION(ssbo_info, 0xD18);
|
|
ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A);
|
|
ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F);
|
|
|
|
#undef ASSERT_REG_POSITION
|
|
|
|
} // namespace Tegra::Engines
|