diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs
index 48b37d35d4..f2dd0963fb 100644
--- a/src/Ryujinx.Graphics.GAL/Capabilities.cs
+++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs
@@ -34,6 +34,7 @@ namespace Ryujinx.Graphics.GAL
public readonly bool SupportsCubemapView;
public readonly bool SupportsNonConstantTextureOffset;
public readonly bool SupportsShaderBallot;
+ public readonly bool SupportsShaderFloat64;
public readonly bool SupportsTextureShadowLod;
public readonly bool SupportsViewportIndexVertexTessellation;
public readonly bool SupportsViewportMask;
@@ -81,6 +82,7 @@ namespace Ryujinx.Graphics.GAL
bool supportsCubemapView,
bool supportsNonConstantTextureOffset,
bool supportsShaderBallot,
+ bool supportsShaderFloat64,
bool supportsTextureShadowLod,
bool supportsViewportIndexVertexTessellation,
bool supportsViewportMask,
@@ -124,6 +126,7 @@ namespace Ryujinx.Graphics.GAL
SupportsCubemapView = supportsCubemapView;
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
SupportsShaderBallot = supportsShaderBallot;
+ SupportsShaderFloat64 = supportsShaderFloat64;
SupportsTextureShadowLod = supportsTextureShadowLod;
SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation;
SupportsViewportMask = supportsViewportMask;
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
index 4b828080d9..9419ea92c1 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
- private const uint CodeGenVersion = 4992;
+ private const uint CodeGenVersion = 5159;
private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data";
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
index 0001243d40..a60564e0e2 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
@@ -141,6 +141,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
+ public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;
+
public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat;
public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod;
diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
index 161191b854..234340e5f0 100644
--- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
+++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
@@ -158,6 +158,7 @@ namespace Ryujinx.Graphics.OpenGL
supportsCubemapView: true,
supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
+ supportsShaderFloat64: true,
supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod,
supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,
supportsViewportMask: HwCapabilities.SupportsViewportArray2,
diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
index 473964def2..d4f99e11c8 100644
--- a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
+++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
@@ -331,6 +331,15 @@ namespace Ryujinx.Graphics.Shader
return true;
}
+ ///
+ /// Queries host GPU support for 64-bit floating point (double precision) operations on the shader.
+ ///
+ /// True if the GPU and driver supports double operations, false otherwise
+ bool QueryHostSupportsShaderFloat64()
+ {
+ return true;
+ }
+
///
/// Queries host GPU support for signed normalized buffer texture formats.
///
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs
index d502a9b659..425cfd909d 100644
--- a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs
@@ -255,5 +255,35 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
_sources = new Operand[] { source };
}
+
+ public void TurnDoubleIntoFloat()
+ {
+ if ((Inst & ~Instruction.Mask) == Instruction.FP64)
+ {
+ Inst = (Inst & Instruction.Mask) | Instruction.FP32;
+ }
+ else
+ {
+ switch (Inst)
+ {
+ case Instruction.ConvertFP32ToFP64:
+ case Instruction.ConvertFP64ToFP32:
+ Inst = Instruction.Copy;
+ break;
+ case Instruction.ConvertFP64ToS32:
+ Inst = Instruction.ConvertFP32ToS32;
+ break;
+ case Instruction.ConvertFP64ToU32:
+ Inst = Instruction.ConvertFP32ToU32;
+ break;
+ case Instruction.ConvertS32ToFP64:
+ Inst = Instruction.ConvertS32ToFP32;
+ break;
+ case Instruction.ConvertU32ToFP64:
+ Inst = Instruction.ConvertU32ToFP32;
+ break;
+ }
+ }
+ }
}
}
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs
index 7dd267f3ce..6958b86f2c 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs
@@ -45,12 +45,101 @@ namespace Ryujinx.Graphics.Shader.Translation
{
return functionName switch
{
+ HelperFunctionName.ConvertDoubleToFloat => GenerateConvertDoubleToFloatFunction(),
+ HelperFunctionName.ConvertFloatToDouble => GenerateConvertFloatToDoubleFunction(),
HelperFunctionName.TexelFetchScale => GenerateTexelFetchScaleFunction(),
HelperFunctionName.TextureSizeUnscale => GenerateTextureSizeUnscaleFunction(),
_ => throw new ArgumentException($"Invalid function name {functionName}")
};
}
+ private Function GenerateConvertDoubleToFloatFunction()
+ {
+ EmitterContext context = new EmitterContext();
+
+ Operand valueLow = Argument(0);
+ Operand valueHigh = Argument(1);
+
+ Operand mantissaLow = context.BitwiseAnd(valueLow, Const(((1 << 22) - 1)));
+ Operand mantissa = context.ShiftRightU32(valueLow, Const(22));
+
+ mantissa = context.BitwiseOr(mantissa, context.ShiftLeft(context.BitwiseAnd(valueHigh, Const(0xfffff)), Const(10)));
+ mantissa = context.BitwiseOr(mantissa, context.ConditionalSelect(mantissaLow, Const(1), Const(0)));
+
+ Operand exp = context.BitwiseAnd(context.ShiftRightU32(valueHigh, Const(20)), Const(0x7ff));
+ Operand sign = context.ShiftRightS32(valueHigh, Const(31));
+
+ Operand resultSign = context.ShiftLeft(sign, Const(31));
+
+ Operand notZero = context.BitwiseOr(mantissa, exp);
+
+ Operand lblNotZero = Label();
+
+ context.BranchIfTrue(lblNotZero, notZero);
+
+ context.Return(resultSign);
+
+ context.MarkLabel(lblNotZero);
+
+ Operand notNaNOrInf = context.ICompareNotEqual(exp, Const(0x7ff));
+
+ mantissa = context.BitwiseOr(mantissa, Const(0x40000000));
+ exp = context.ISubtract(exp, Const(0x381));
+
+ // Note: Overflow cases are not handled here and might produce incorrect results.
+
+ Operand roundBits = context.BitwiseAnd(mantissa, Const(0x7f));
+ Operand roundBitsXor64 = context.BitwiseExclusiveOr(roundBits, Const(0x40));
+ mantissa = context.ShiftRightU32(context.IAdd(mantissa, Const(0x40)), Const(7));
+ mantissa = context.BitwiseAnd(mantissa, context.ConditionalSelect(roundBitsXor64, Const(~0), Const(~1)));
+
+ exp = context.ConditionalSelect(mantissa, exp, Const(0));
+ exp = context.ConditionalSelect(notNaNOrInf, exp, Const(0xff));
+
+ Operand result = context.IAdd(context.IAdd(mantissa, context.ShiftLeft(exp, Const(23))), resultSign);
+
+ context.Return(result);
+
+ return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "ConvertDoubleToFloat", true, 2, 0);
+ }
+
+ private Function GenerateConvertFloatToDoubleFunction()
+ {
+ EmitterContext context = new EmitterContext();
+
+ Operand value = Argument(0);
+
+ Operand mantissa = context.BitwiseAnd(value, Const(0x7fffff));
+ Operand exp = context.BitwiseAnd(context.ShiftRightU32(value, Const(23)), Const(0xff));
+ Operand sign = context.ShiftRightS32(value, Const(31));
+
+ Operand notNaNOrInf = context.ICompareNotEqual(exp, Const(0xff));
+ Operand expNotZero = context.ICompareNotEqual(exp, Const(0));
+ Operand notDenorm = context.BitwiseOr(expNotZero, context.ICompareEqual(mantissa, Const(0)));
+
+ exp = context.IAdd(exp, Const(0x380));
+
+ Operand shiftDist = context.ISubtract(Const(32), context.FindMSBU32(mantissa));
+ Operand normExp = context.ISubtract(context.ISubtract(Const(1), shiftDist), Const(1));
+ Operand normMant = context.ShiftLeft(mantissa, shiftDist);
+
+ exp = context.ConditionalSelect(notNaNOrInf, exp, Const(0x7ff));
+ exp = context.ConditionalSelect(notDenorm, exp, normExp);
+ mantissa = context.ConditionalSelect(expNotZero, mantissa, normMant);
+
+ Operand resultLow = context.ShiftLeft(mantissa, Const(29));
+ Operand resultHigh = context.ShiftRightU32(mantissa, Const(3));
+
+ resultHigh = context.IAdd(resultHigh, context.ShiftLeft(exp, Const(20)));
+ resultHigh = context.IAdd(resultHigh, context.ShiftLeft(sign, Const(31)));
+
+ context.Copy(Argument(1), resultLow);
+ context.Copy(Argument(2), resultHigh);
+ context.Return();
+
+ return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "ConvertFloatToDouble", false, 1, 2);
+ }
+
private Function GenerateTexelFetchScaleFunction()
{
EmitterContext context = new EmitterContext();
diff --git a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs
index 5accdf65fb..8c37c34c7b 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs
@@ -1,10 +1,9 @@
-using Ryujinx.Graphics.Shader.IntermediateRepresentation;
-using System.Collections.Generic;
-
namespace Ryujinx.Graphics.Shader.Translation
{
enum HelperFunctionName
{
+ ConvertDoubleToFloat,
+ ConvertFloatToDouble,
TexelFetchScale,
TextureSizeUnscale
}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/DoubleToFloat.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/DoubleToFloat.cs
new file mode 100644
index 0000000000..42bce5cc27
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/DoubleToFloat.cs
@@ -0,0 +1,70 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class DoubleToFloat
+ {
+ public static void RunPass(HelperFunctionManager hfm, BasicBlock block)
+ {
+ for (LinkedListNode node = block.Operations.First; node != null; node = node.Next)
+ {
+ if (node.Value is not Operation operation)
+ {
+ continue;
+ }
+
+ node = InsertSoftFloat64(hfm, node);
+ }
+ }
+
+ private static LinkedListNode InsertSoftFloat64(HelperFunctionManager hfm, LinkedListNode node)
+ {
+ Operation operation = (Operation)node.Value;
+
+ if (operation.Inst == Instruction.PackDouble2x32)
+ {
+ int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.ConvertDoubleToFloat);
+
+ Operand[] callArgs = new Operand[] { Const(functionId), operation.GetSource(0), operation.GetSource(1) };
+
+ Operand floatValue = operation.Dest;
+
+ operation.Dest = null;
+
+ LinkedListNode newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, floatValue, callArgs));
+
+ Utils.DeleteNode(node, operation);
+
+ return newNode;
+ }
+ else if (operation.Inst == Instruction.UnpackDouble2x32)
+ {
+ int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.ConvertFloatToDouble);
+
+ // TODO: Allow UnpackDouble2x32 to produce two outputs and get rid of "operation.Index".
+
+ Operand resultLow = operation.Index == 0 ? operation.Dest : Local();
+ Operand resultHigh = operation.Index == 1 ? operation.Dest : Local();
+
+ operation.Dest = null;
+
+ Operand[] callArgs = new Operand[] { Const(functionId), operation.GetSource(0), resultLow, resultHigh };
+
+ LinkedListNode newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs));
+
+ Utils.DeleteNode(node, operation);
+
+ return newNode;
+ }
+ else
+ {
+ operation.TurnDoubleIntoFloat();
+
+ return node;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
index bdb3a62ece..8d2669c0de 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
@@ -11,8 +11,12 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
RunOptimizationPasses(blocks, config);
+ // TODO: Some of those are not optimizations and shouldn't be here.
+
GlobalToStorage.RunPass(hfm, blocks, config);
+ bool hostSupportsShaderFloat64 = config.GpuAccessor.QueryHostSupportsShaderFloat64();
+
// Those passes are looking for specific patterns and only needs to run once.
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
{
@@ -24,6 +28,12 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
EliminateMultiplyByFragmentCoordW(blocks[blkIndex]);
}
+
+ // If the host does not support double operations, we need to turn them into float operations.
+ if (!hostSupportsShaderFloat64)
+ {
+ DoubleToFloat.RunPass(hfm, blocks[blkIndex]);
+ }
}
// Run optimizations one last time to remove any code that is now optimizable after above passes.
diff --git a/src/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs b/src/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs
index f600d93f00..393bcf1a20 100644
--- a/src/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs
+++ b/src/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs
@@ -26,6 +26,7 @@ namespace Ryujinx.Graphics.Vulkan
public readonly bool SupportsFragmentShaderInterlock;
public readonly bool SupportsGeometryShaderPassthrough;
public readonly bool SupportsSubgroupSizeControl;
+ public readonly bool SupportsShaderFloat64;
public readonly bool SupportsShaderInt8;
public readonly bool SupportsShaderStencilExport;
public readonly bool SupportsShaderStorageImageMultisample;
@@ -63,6 +64,7 @@ namespace Ryujinx.Graphics.Vulkan
bool supportsFragmentShaderInterlock,
bool supportsGeometryShaderPassthrough,
bool supportsSubgroupSizeControl,
+ bool supportsShaderFloat64,
bool supportsShaderInt8,
bool supportsShaderStencilExport,
bool supportsShaderStorageImageMultisample,
@@ -99,6 +101,7 @@ namespace Ryujinx.Graphics.Vulkan
SupportsFragmentShaderInterlock = supportsFragmentShaderInterlock;
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
SupportsSubgroupSizeControl = supportsSubgroupSizeControl;
+ SupportsShaderFloat64 = supportsShaderFloat64;
SupportsShaderInt8 = supportsShaderInt8;
SupportsShaderStencilExport = supportsShaderStencilExport;
SupportsShaderStorageImageMultisample = supportsShaderStorageImageMultisample;
diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
index 3987be9b47..0daec00c33 100644
--- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
+++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
@@ -306,6 +306,7 @@ namespace Ryujinx.Graphics.Vulkan
_physicalDevice.IsDeviceExtensionPresent("VK_EXT_fragment_shader_interlock"),
_physicalDevice.IsDeviceExtensionPresent("VK_NV_geometry_shader_passthrough"),
supportsSubgroupSizeControl,
+ features2.Features.ShaderFloat64,
featuresShaderInt8.ShaderInt8,
_physicalDevice.IsDeviceExtensionPresent("VK_EXT_shader_stencil_export"),
features2.Features.ShaderStorageImageMultisample,
@@ -594,6 +595,7 @@ namespace Ryujinx.Graphics.Vulkan
supportsCubemapView: !IsAmdGcn,
supportsNonConstantTextureOffset: false,
supportsShaderBallot: false,
+ supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
supportsTextureShadowLod: false,
supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex,
supportsViewportMask: Capabilities.SupportsViewportArray2,