Add support for shader atomic min/max (S32) (#1948)

This commit is contained in:
gdkchan 2021-01-26 03:38:33 -03:00 committed by GitHub
parent c19cfca183
commit e453ba69f4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 103 additions and 21 deletions

View file

@ -157,6 +157,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
}
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Shared) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Storage) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl");
@ -523,7 +533,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
string code = EmbeddedResources.ReadAllText(filename);
context.AppendLine(code.Replace("\t", CodeGenContext.Tab));
code = code.Replace("\t", CodeGenContext.Tab);
code = code.Replace("$SHARED_MEM$", DefaultNames.SharedMemoryName);
code = code.Replace("$STORAGE_MEM$", OperandManager.GetShaderStagePrefix(context.Config.Stage) + "_" + DefaultNames.StorageNamePrefix);
context.AppendLine(code);
context.AppendLine();
}
}

View file

@ -0,0 +1,21 @@
int Helper_AtomicMaxS32(int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[offset];
newValue = uint(max(int(oldValue), value));
} while (atomicCompSwap($SHARED_MEM$[offset], newValue, oldValue) != oldValue);
return int(oldValue);
}
int Helper_AtomicMinS32(int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[offset];
newValue = uint(min(int(oldValue), value));
} while (atomicCompSwap($SHARED_MEM$[offset], newValue, oldValue) != oldValue);
return int(oldValue);
}

View file

@ -0,0 +1,21 @@
int Helper_AtomicMaxS32(int index, int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $STORAGE_MEM$[index].data[offset];
newValue = uint(max(int(oldValue), value));
} while (atomicCompSwap($STORAGE_MEM$[index].data[offset], newValue, oldValue) != oldValue);
return int(oldValue);
}
int Helper_AtomicMinS32(int index, int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $STORAGE_MEM$[index].data[offset];
newValue = uint(min(int(oldValue), value));
} while (atomicCompSwap($STORAGE_MEM$[index].data[offset], newValue, oldValue) != oldValue);
return int(oldValue);
}

View file

@ -2,6 +2,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
static class HelperFunctionNames
{
public static string AtomicMaxS32 = "Helper_AtomicMaxS32";
public static string AtomicMinS32 = "Helper_AtomicMinS32";
public static string MultiplyHighS32 = "Helper_MultiplyHighS32";
public static string MultiplyHighU32 = "Helper_MultiplyHighU32";

View file

@ -1,6 +1,8 @@
ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) {
ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex)
{
float scale = cp_renderScale[samplerIndex];
if (scale == 1.0) {
if (scale == 1.0)
{
return inputVec;
}
return ivec2(vec2(inputVec) * scale);

View file

@ -1,11 +1,16 @@
ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) {
ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex)
{
float scale = fp_renderScale[1 + samplerIndex];
if (scale == 1.0) {
if (scale == 1.0)
{
return inputVec;
}
if (scale < 0.0) { // If less than 0, try interpolate between texels by using the screen position.
if (scale < 0.0) // If less than 0, try interpolate between texels by using the screen position.
{
return ivec2(vec2(inputVec) * (-scale) + mod(gl_FragCoord.xy, -scale));
} else {
}
else
{
return ivec2(vec2(inputVec) * scale);
}
}

View file

@ -42,13 +42,18 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
for (int argIndex = 0; argIndex < arity; argIndex++)
{
// For shared memory access, the second argument is unused and should be ignored.
// It is there to make both storage and shared access have the same number of arguments.
if (argIndex == 1 && (inst & Instruction.MrMask) == Instruction.MrShared)
{
continue;
}
if (argIndex != 0)
{
args += ", ";
}
VariableType dstType = GetSrcVarType(inst, argIndex);
if (argIndex == 0 && atomic)
{
Instruction memRegion = inst & Instruction.MrMask;
@ -60,12 +65,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
default: throw new InvalidOperationException($"Invalid memory region \"{memRegion}\".");
}
// We use the first 2 operands above.
argIndex++;
}
else
{
VariableType dstType = GetSrcVarType(inst, argIndex);
args += GetSoureExpr(context, operation.GetSource(argIndex), dstType);
}
}

View file

@ -16,9 +16,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.AtomicAdd, InstType.AtomicBinary, "atomicAdd");
Add(Instruction.AtomicAnd, InstType.AtomicBinary, "atomicAnd");
Add(Instruction.AtomicCompareAndSwap, InstType.AtomicTernary, "atomicCompSwap");
Add(Instruction.AtomicMaxS32, InstType.AtomicBinary, "atomicMax");
Add(Instruction.AtomicMaxS32, InstType.CallTernary, HelperFunctionNames.AtomicMaxS32);
Add(Instruction.AtomicMaxU32, InstType.AtomicBinary, "atomicMax");
Add(Instruction.AtomicMinS32, InstType.AtomicBinary, "atomicMin");
Add(Instruction.AtomicMinS32, InstType.CallTernary, HelperFunctionNames.AtomicMinS32);
Add(Instruction.AtomicMinU32, InstType.AtomicBinary, "atomicMin");
Add(Instruction.AtomicOr, InstType.AtomicBinary, "atomicOr");
Add(Instruction.AtomicSwap, InstType.AtomicBinary, "atomicExchange");

View file

@ -9,6 +9,8 @@
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\AtomicMinMaxS32Shared.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\AtomicMinMaxS32Storage.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighS32.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighU32.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\Shuffle.glsl" />

View file

@ -5,12 +5,14 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
[Flags]
enum HelperFunctionsMask
{
MultiplyHighS32 = 1 << 0,
MultiplyHighU32 = 1 << 1,
Shuffle = 1 << 2,
ShuffleDown = 1 << 3,
ShuffleUp = 1 << 4,
ShuffleXor = 1 << 5,
SwizzleAdd = 1 << 6
AtomicMinMaxS32Shared = 1 << 0,
AtomicMinMaxS32Storage = 1 << 1,
MultiplyHighS32 = 1 << 2,
MultiplyHighU32 = 1 << 3,
Shuffle = 1 << 4,
ShuffleDown = 1 << 5,
ShuffleUp = 1 << 6,
ShuffleXor = 1 << 7,
SwizzleAdd = 1 << 8
}
}

View file

@ -244,6 +244,14 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
// decide which helper functions are needed on the final generated code.
switch (operation.Inst)
{
case Instruction.AtomicMaxS32 | Instruction.MrShared:
case Instruction.AtomicMinS32 | Instruction.MrShared:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Shared;
break;
case Instruction.AtomicMaxS32 | Instruction.MrStorage:
case Instruction.AtomicMinS32 | Instruction.MrStorage:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Storage;
break;
case Instruction.MultiplyHighS32:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32;
break;