Merge branch 'master' into patch-3
This commit is contained in:
147 changed files with 10210 additions and 3287 deletions
@ -158,3 +158,6 @@ $RECYCLE.BIN/
# Mac desktop service store files
# VS Launch Settings
@ -4,6 +4,7 @@ using ChocolArm64.Instruction;
using ChocolArm64.Instruction32;
using ChocolArm64.State;
using System;
using System.Collections.Generic;
namespace ChocolArm64
@ -44,7 +45,7 @@ namespace ChocolArm64
SetA64("11101010xx0xxxxxxxxxxxxxxxxxxxxx", AInstEmit.Ands, typeof(AOpCodeAluRs));
SetA64("x0011010110xxxxx001010xxxxxxxxxx", AInstEmit.Asrv, typeof(AOpCodeAluRs));
SetA64("000101xxxxxxxxxxxxxxxxxxxxxxxxxx", AInstEmit.B, typeof(AOpCodeBImmAl));
SetA64("01010100xxxxxxxxxxxxxxxxxxxxxxxx", AInstEmit.B_Cond, typeof(AOpCodeBImmCond));
SetA64("01010100xxxxxxxxxxxxxxxxxxx0xxxx", AInstEmit.B_Cond, typeof(AOpCodeBImmCond));
SetA64("00110011000xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Bfm, typeof(AOpCodeBfm));
SetA64("1011001101xxxxxxxxxxxxxxxxxxxxxx", AInstEmit.Bfm, typeof(AOpCodeBfm));
SetA64("00001010xx1xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Bic, typeof(AOpCodeAluRs));
@ -52,8 +53,8 @@ namespace ChocolArm64
SetA64("01101010xx1xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Bics, typeof(AOpCodeAluRs));
SetA64("11101010xx1xxxxxxxxxxxxxxxxxxxxx", AInstEmit.Bics, typeof(AOpCodeAluRs));
SetA64("100101xxxxxxxxxxxxxxxxxxxxxxxxxx", AInstEmit.Bl, typeof(AOpCodeBImmAl));
SetA64("11010110001xxxxx000000xxxxxxxxxx", AInstEmit.Blr, typeof(AOpCodeBReg));
SetA64("11010110000xxxxx000000xxxxxxxxxx", AInstEmit.Br, typeof(AOpCodeBReg));
SetA64("1101011000111111000000xxxxx00000", AInstEmit.Blr, typeof(AOpCodeBReg));
SetA64("1101011000011111000000xxxxx00000", AInstEmit.Br, typeof(AOpCodeBReg));
SetA64("11010100001xxxxxxxxxxxxxxxx00000", AInstEmit.Brk, typeof(AOpCodeException));
SetA64("x0110101xxxxxxxxxxxxxxxxxxxxxxxx", AInstEmit.Cbnz, typeof(AOpCodeBImmCmp));
SetA64("x0110100xxxxxxxxxxxxxxxxxxxxxxxx", AInstEmit.Cbz, typeof(AOpCodeBImmCmp));
@ -126,7 +127,7 @@ namespace ChocolArm64
SetA64("11111000100xxxxxxxxx00xxxxxxxxxx", AInstEmit.Pfrm, typeof(AOpCodeMemImm));
SetA64("11011000xxxxxxxxxxxxxxxxxxxxxxxx", AInstEmit.Pfrm, typeof(AOpCodeMemLit));
SetA64("x101101011000000000000xxxxxxxxxx", AInstEmit.Rbit, typeof(AOpCodeAlu));
SetA64("11010110010xxxxx000000xxxxxxxxxx", AInstEmit.Ret, typeof(AOpCodeBReg));
SetA64("1101011001011111000000xxxxx00000", AInstEmit.Ret, typeof(AOpCodeBReg));
SetA64("x101101011000000000001xxxxxxxxxx", AInstEmit.Rev16, typeof(AOpCodeAlu));
SetA64("x101101011000000000010xxxxxxxxxx", AInstEmit.Rev32, typeof(AOpCodeAlu));
SetA64("1101101011000000000011xxxxxxxxxx", AInstEmit.Rev64, typeof(AOpCodeAlu));
@ -266,11 +267,13 @@ namespace ChocolArm64
SetA64("0>1011100<1xxxxx111111xxxxxxxxxx", AInstEmit.Fdiv_V, typeof(AOpCodeSimdReg));
SetA64("000111110x0xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Fmadd_S, typeof(AOpCodeSimdReg));
SetA64("000111100x1xxxxx010010xxxxxxxxxx", AInstEmit.Fmax_S, typeof(AOpCodeSimdReg));
SetA64("0x0011100x1xxxxx111101xxxxxxxxxx", AInstEmit.Fmax_V, typeof(AOpCodeSimdReg));
SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", AInstEmit.Fmax_V, typeof(AOpCodeSimdReg));
SetA64("000111100x1xxxxx011010xxxxxxxxxx", AInstEmit.Fmaxnm_S, typeof(AOpCodeSimdReg));
SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", AInstEmit.Fmaxnm_V, typeof(AOpCodeSimdReg));
SetA64("000111100x1xxxxx010110xxxxxxxxxx", AInstEmit.Fmin_S, typeof(AOpCodeSimdReg));
SetA64("0x0011101x1xxxxx111101xxxxxxxxxx", AInstEmit.Fmin_V, typeof(AOpCodeSimdReg));
SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", AInstEmit.Fmin_V, typeof(AOpCodeSimdReg));
SetA64("000111100x1xxxxx011110xxxxxxxxxx", AInstEmit.Fminnm_S, typeof(AOpCodeSimdReg));
SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", AInstEmit.Fminnm_V, typeof(AOpCodeSimdReg));
SetA64("010111111<<xxxxx0001x0xxxxxxxxxx", AInstEmit.Fmla_Se, typeof(AOpCodeSimdRegElemF));
SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", AInstEmit.Fmla_V, typeof(AOpCodeSimdReg));
SetA64("0x0011111<<xxxxx0001x0xxxxxxxxxx", AInstEmit.Fmla_Ve, typeof(AOpCodeSimdRegElemF));
@ -371,16 +374,31 @@ namespace ChocolArm64
SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", AInstEmit.Smin_V, typeof(AOpCodeSimdReg));
SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", AInstEmit.Sminp_V, typeof(AOpCodeSimdReg));
SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", AInstEmit.Smlal_V, typeof(AOpCodeSimdReg));
SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", AInstEmit.Smlsl_V, typeof(AOpCodeSimdReg));
SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Smull_V, typeof(AOpCodeSimdReg));
SetA64("01011110xx100000011110xxxxxxxxxx", AInstEmit.Sqabs_S, typeof(AOpCodeSimd));
SetA64("0>001110<<100000011110xxxxxxxxxx", AInstEmit.Sqabs_V, typeof(AOpCodeSimd));
SetA64("01011110xx1xxxxx000011xxxxxxxxxx", AInstEmit.Sqadd_S, typeof(AOpCodeSimdReg));
SetA64("0>001110<<1xxxxx000011xxxxxxxxxx", AInstEmit.Sqadd_V, typeof(AOpCodeSimdReg));
SetA64("01111110xx100000011110xxxxxxxxxx", AInstEmit.Sqneg_S, typeof(AOpCodeSimd));
SetA64("0>101110<<100000011110xxxxxxxxxx", AInstEmit.Sqneg_V, typeof(AOpCodeSimd));
SetA64("0x00111100>>>xxx100111xxxxxxxxxx", AInstEmit.Sqrshrn_V, typeof(AOpCodeSimdShImm));
SetA64("01011110xx1xxxxx001011xxxxxxxxxx", AInstEmit.Sqsub_S, typeof(AOpCodeSimdReg));
SetA64("0>001110<<1xxxxx001011xxxxxxxxxx", AInstEmit.Sqsub_V, typeof(AOpCodeSimdReg));
SetA64("01011110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_S, typeof(AOpCodeSimd));
SetA64("0x001110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_V, typeof(AOpCodeSimd));
SetA64("01111110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_S, typeof(AOpCodeSimd));
SetA64("0x101110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_V, typeof(AOpCodeSimd));
SetA64("0x00111100>>>xxx001001xxxxxxxxxx", AInstEmit.Srshr_V, typeof(AOpCodeSimdShImm));
SetA64("0100111101xxxxxx001001xxxxxxxxxx", AInstEmit.Srshr_V, typeof(AOpCodeSimdShImm));
SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Sshl_V, typeof(AOpCodeSimdReg));
SetA64("0x00111100>>>xxx101001xxxxxxxxxx", AInstEmit.Sshll_V, typeof(AOpCodeSimdShImm));
SetA64("010111110>>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_S, typeof(AOpCodeSimdShImm));
SetA64("0x0011110>>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm));
SetA64("0x0011110>>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm));
SetA64("0101111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_S, typeof(AOpCodeSimdShImm));
SetA64("0x00111100>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm));
SetA64("0100111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm));
SetA64("0x00111100>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm));
SetA64("0100111101xxxxxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm));
SetA64("0x001110<<1xxxxx001100xxxxxxxxxx", AInstEmit.Ssubw_V, typeof(AOpCodeSimdReg));
SetA64("0x00110000000000xxxxxxxxxxxxxxxx", AInstEmit.St__Vms, typeof(AOpCodeSimdMemMs));
SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", AInstEmit.St__Vms, typeof(AOpCodeSimdMemMs));
SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", AInstEmit.St__Vss, typeof(AOpCodeSimdMemSs));
@ -394,6 +412,8 @@ namespace ChocolArm64
SetA64("01111110111xxxxx100001xxxxxxxxxx", AInstEmit.Sub_S, typeof(AOpCodeSimdReg));
SetA64("0>101110<<1xxxxx100001xxxxxxxxxx", AInstEmit.Sub_V, typeof(AOpCodeSimdReg));
SetA64("0x001110<<1xxxxx011000xxxxxxxxxx", AInstEmit.Subhn_V, typeof(AOpCodeSimdReg));
SetA64("01011110xx100000001110xxxxxxxxxx", AInstEmit.Suqadd_S, typeof(AOpCodeSimd));
SetA64("0>001110<<100000001110xxxxxxxxxx", AInstEmit.Suqadd_V, typeof(AOpCodeSimd));
SetA64("0x001110000xxxxx0xx000xxxxxxxxxx", AInstEmit.Tbl_V, typeof(AOpCodeSimdTbl));
SetA64("0>001110<<0xxxxx001010xxxxxxxxxx", AInstEmit.Trn1_V, typeof(AOpCodeSimdReg));
SetA64("0>001110<<0xxxxx011010xxxxxxxxxx", AInstEmit.Trn2_V, typeof(AOpCodeSimdReg));
@ -415,31 +435,65 @@ namespace ChocolArm64
SetA64("0x101110<<1xxxxx101011xxxxxxxxxx", AInstEmit.Uminp_V, typeof(AOpCodeSimdReg));
SetA64("0x001110000xxxxx001111xxxxxxxxxx", AInstEmit.Umov_S, typeof(AOpCodeSimdIns));
SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Umull_V, typeof(AOpCodeSimdReg));
SetA64("01111110xx1xxxxx000011xxxxxxxxxx", AInstEmit.Uqadd_S, typeof(AOpCodeSimdReg));
SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", AInstEmit.Uqadd_V, typeof(AOpCodeSimdReg));
SetA64("01111110xx1xxxxx001011xxxxxxxxxx", AInstEmit.Uqsub_S, typeof(AOpCodeSimdReg));
SetA64("0>101110<<1xxxxx001011xxxxxxxxxx", AInstEmit.Uqsub_V, typeof(AOpCodeSimdReg));
SetA64("01111110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_S, typeof(AOpCodeSimd));
SetA64("0x101110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_V, typeof(AOpCodeSimd));
SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Ushl_V, typeof(AOpCodeSimdReg));
SetA64("0x10111100>>>xxx101001xxxxxxxxxx", AInstEmit.Ushll_V, typeof(AOpCodeSimdShImm));
SetA64("011111110>>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_S, typeof(AOpCodeSimdShImm));
SetA64("0x1011110>>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm));
SetA64("0x1011110>>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm));
SetA64("0111111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_S, typeof(AOpCodeSimdShImm));
SetA64("0x10111100>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm));
SetA64("0110111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm));
SetA64("01111110xx100000001110xxxxxxxxxx", AInstEmit.Usqadd_S, typeof(AOpCodeSimd));
SetA64("0>101110<<100000001110xxxxxxxxxx", AInstEmit.Usqadd_V, typeof(AOpCodeSimd));
SetA64("0x10111100>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm));
SetA64("0110111101xxxxxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm));
SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", AInstEmit.Usubw_V, typeof(AOpCodeSimdReg));
SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", AInstEmit.Uzp1_V, typeof(AOpCodeSimdReg));
SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", AInstEmit.Uzp2_V, typeof(AOpCodeSimdReg));
SetA64("0x001110<<100001001010xxxxxxxxxx", AInstEmit.Xtn_V, typeof(AOpCodeSimd));
SetA64("0>001110<<0xxxxx001110xxxxxxxxxx", AInstEmit.Zip1_V, typeof(AOpCodeSimdReg));
SetA64("0>001110<<0xxxxx011110xxxxxxxxxx", AInstEmit.Zip2_V, typeof(AOpCodeSimdReg));
#region "Generate InstA64FastLookup Table (AArch64)"
var Tmp = new List<InstInfo>[FastLookupSize];
for (int i = 0; i < FastLookupSize; i++)
Tmp[i] = new List<InstInfo>();
private class TreeNode
foreach (var Inst in AllInstA64)
int Mask = ToFastLookupIndex(Inst.Mask);
int Value = ToFastLookupIndex(Inst.Value);
for (int i = 0; i < FastLookupSize; i++)
if ((i & Mask) == Value)
for (int i = 0; i < FastLookupSize; i++)
InstA64FastLookup[i] = Tmp[i].ToArray();
private class InstInfo
public int Mask;
public int Value;
public TreeNode Next;
public AInst Inst;
public TreeNode(int Mask, int Value, AInst Inst)
public InstInfo(int Mask, int Value, AInst Inst)
this.Mask = Mask;
this.Value = Value;
@ -447,8 +501,11 @@ namespace ChocolArm64
private static TreeNode InstHeadA32;
private static TreeNode InstHeadA64;
private static List<InstInfo> AllInstA32 = new List<InstInfo>();
private static List<InstInfo> AllInstA64 = new List<InstInfo>();
private static int FastLookupSize = 0x1000;
private static InstInfo[][] InstA64FastLookup = new InstInfo[FastLookupSize][];
private static void SetA32(string Encoding, AInstInterpreter Interpreter, Type Type)
@ -509,7 +566,7 @@ namespace ChocolArm64
if (XBits == 0)
InsertTop(XMask, Value, Inst, Mode);
InsertInst(XMask, Value, Inst, Mode);
@ -525,55 +582,53 @@ namespace ChocolArm64
if (Mask != Blacklisted)
InsertTop(XMask, Value | Mask, Inst, Mode);
InsertInst(XMask, Value | Mask, Inst, Mode);
private static void InsertTop(
private static void InsertInst(
int XMask,
int Value,
AInst Inst,
AExecutionMode Mode)
TreeNode Node = new TreeNode(XMask, Value, Inst);
InstInfo Info = new InstInfo(XMask, Value, Inst);
if (Mode == AExecutionMode.AArch64)
Node.Next = InstHeadA64;
InstHeadA64 = Node;
Node.Next = InstHeadA32;
InstHeadA32 = Node;
public static AInst GetInstA32(int OpCode)
return GetInst(InstHeadA32, OpCode);
return GetInstFromList(AllInstA32, OpCode);
public static AInst GetInstA64(int OpCode)
return GetInst(InstHeadA64, OpCode);
return GetInstFromList(InstA64FastLookup[ToFastLookupIndex(OpCode)], OpCode);
private static AInst GetInst(TreeNode Head, int OpCode)
private static int ToFastLookupIndex(int Value)
TreeNode Node = Head;
return ((Value >> 10) & 0x00F) | ((Value >> 18) & 0xFF0);
private static AInst GetInstFromList(IEnumerable<InstInfo> InstList, int OpCode)
foreach (var Node in InstList)
if ((OpCode & Node.Mask) == Node.Value)
return Node.Inst;
while ((Node = Node.Next) != null);
return AInst.Undefined;
@ -48,18 +48,24 @@ namespace ChocolArm64.Instruction
AOpCodeMemEx Op = (AOpCodeMemEx)Context.CurrOp;
if (AccType.HasFlag(AccessType.Ordered))
bool Ordered = (AccType & AccessType.Ordered) != 0;
bool Exclusive = (AccType & AccessType.Exclusive) != 0;
if (Ordered)
if (AccType.HasFlag(AccessType.Exclusive))
if (Exclusive)
EmitMemoryCall(Context, nameof(AMemory.SetExclusive), Op.Rn);
EmitReadZxCall(Context, Op.Size);
@ -68,7 +74,7 @@ namespace ChocolArm64.Instruction
if (Pair)
Context.EmitLdc_I(8 << Op.Size);
@ -104,7 +110,10 @@ namespace ChocolArm64.Instruction
AOpCodeMemEx Op = (AOpCodeMemEx)Context.CurrOp;
if (AccType.HasFlag(AccessType.Ordered))
bool Ordered = (AccType & AccessType.Ordered) != 0;
bool Exclusive = (AccType & AccessType.Exclusive) != 0;
if (Ordered)
@ -112,7 +121,7 @@ namespace ChocolArm64.Instruction
AILLabel LblEx = new AILLabel();
AILLabel LblEnd = new AILLabel();
if (AccType.HasFlag(AccessType.Exclusive))
if (Exclusive)
EmitMemoryCall(Context, nameof(AMemory.TestExclusive), Op.Rn);
@ -145,7 +154,7 @@ namespace ChocolArm64.Instruction
EmitWriteCall(Context, Op.Size);
if (AccType.HasFlag(AccessType.Exclusive))
if (Exclusive)
@ -65,11 +65,12 @@ namespace ChocolArm64.Instruction
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size);
for (int Index = 1; Index < (Bytes >> Op.Size); Index++)
for (int Index = 1; Index < Elems; Index++)
EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
@ -97,13 +98,16 @@ namespace ChocolArm64.Instruction
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
int ESize = 8 << Op.Size;
for (int Index = 0; Index < Elems; Index++)
EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
Context.EmitLdc_I4(8 << Op.Size);
@ -159,12 +163,19 @@ namespace ChocolArm64.Instruction
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int Elems = 8 >> Op.Size;
int ESize = 8 << Op.Size;
int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
long RoundConst = 1L << (ESize - 1);
if (Part != 0)
for (int Index = 0; Index < Elems; Index++)
EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1);
@ -181,93 +192,18 @@ namespace ChocolArm64.Instruction
EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size);
EmitVectorInsertTmp(Context, Part + Index, Op.Size);
if (Part == 0)
EmitVectorZeroUpper(Context, Op.Rd);
private static void EmitSaturatingExtNarrow(AILEmitterCtx Context, bool SignedSrc, bool SignedDst, bool Scalar)
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int Elems = (!Scalar ? 8 >> Op.Size : 1);
int ESize = 8 << Op.Size;
int Part = (!Scalar & (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0);
int TMaxValue = (SignedDst ? (1 << (ESize - 1)) - 1 : (int)((1L << ESize) - 1L));
int TMinValue = (SignedDst ? -((1 << (ESize - 1))) : 0);
for (int Index = 0; Index < Elems; Index++)
AILLabel LblLe = new AILLabel();
AILLabel LblGeEnd = new AILLabel();
EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc);
Context.Emit(SignedSrc ? OpCodes.Ble_S : OpCodes.Ble_Un_S, LblLe);
Context.Emit(OpCodes.Br_S, LblGeEnd);
Context.Emit(SignedSrc ? OpCodes.Bge_S : OpCodes.Bge_Un_S, LblGeEnd);
if (Scalar)
EmitVectorZeroLower(Context, Op.Rd);
EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size);
if (Part == 0)
EmitVectorZeroUpper(Context, Op.Rd);
Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpsr));
Context.EmitCallPropSet(typeof(AThreadState), nameof(AThreadState.Fpsr));
public static void Fabd_S(AILEmitterCtx Context)
EmitScalarBinaryOpF(Context, () =>
@ -338,7 +274,7 @@ namespace ChocolArm64.Instruction
int SizeF = Op.Size & 1;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> SizeF + 2;
int Half = Elems >> 1;
@ -399,98 +335,66 @@ namespace ChocolArm64.Instruction
public static void Fmax_S(AILEmitterCtx Context)
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
EmitScalarBinaryOpF(Context, () =>
if (Op.Size == 0)
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF));
else if (Op.Size == 1)
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max));
throw new InvalidOperationException();
EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Max));
public static void Fmax_V(AILEmitterCtx Context)
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
EmitVectorBinaryOpF(Context, () =>
if (Op.Size == 0)
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF));
else if (Op.Size == 1)
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max));
throw new InvalidOperationException();
public static void Fmin_S(AILEmitterCtx Context)
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
EmitScalarBinaryOpF(Context, () =>
if (Op.Size == 0)
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF));
else if (Op.Size == 1)
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min));
throw new InvalidOperationException();
public static void Fmin_V(AILEmitterCtx Context)
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int SizeF = Op.Size & 1;
EmitVectorBinaryOpF(Context, () =>
if (SizeF == 0)
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF));
else if (SizeF == 1)
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min));
throw new InvalidOperationException();
EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Max));
public static void Fmaxnm_S(AILEmitterCtx Context)
EmitScalarBinaryOpF(Context, () =>
EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.MaxNum));
public static void Fmaxnm_V(AILEmitterCtx Context)
EmitVectorBinaryOpF(Context, () =>
EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.MaxNum));
public static void Fmin_S(AILEmitterCtx Context)
EmitScalarBinaryOpF(Context, () =>
EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Min));
public static void Fmin_V(AILEmitterCtx Context)
EmitVectorBinaryOpF(Context, () =>
EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Min));
public static void Fminnm_S(AILEmitterCtx Context)
EmitScalarBinaryOpF(Context, () =>
EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.MinNum));
public static void Fminnm_V(AILEmitterCtx Context)
EmitVectorBinaryOpF(Context, () =>
EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.MinNum));
public static void Fmla_Se(AILEmitterCtx Context)
@ -870,7 +774,7 @@ namespace ChocolArm64.Instruction
int SizeF = Op.Size & 1;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
for (int Index = 0; Index < Bytes >> SizeF + 2; Index++)
@ -1102,29 +1006,83 @@ namespace ChocolArm64.Instruction
public static void Smlsl_V(AILEmitterCtx Context)
EmitVectorWidenRnRmTernaryOpSx(Context, () =>
public static void Smull_V(AILEmitterCtx Context)
EmitVectorWidenRnRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Mul));
public static void Sqabs_S(AILEmitterCtx Context)
EmitScalarSaturatingUnaryOpSx(Context, () => EmitAbs(Context));
public static void Sqabs_V(AILEmitterCtx Context)
EmitVectorSaturatingUnaryOpSx(Context, () => EmitAbs(Context));
public static void Sqadd_S(AILEmitterCtx Context)
EmitScalarSaturatingBinaryOpSx(Context, SaturatingFlags.Add);
public static void Sqadd_V(AILEmitterCtx Context)
EmitVectorSaturatingBinaryOpSx(Context, SaturatingFlags.Add);
public static void Sqneg_S(AILEmitterCtx Context)
EmitScalarSaturatingUnaryOpSx(Context, () => Context.Emit(OpCodes.Neg));
public static void Sqneg_V(AILEmitterCtx Context)
EmitVectorSaturatingUnaryOpSx(Context, () => Context.Emit(OpCodes.Neg));
public static void Sqsub_S(AILEmitterCtx Context)
EmitScalarSaturatingBinaryOpSx(Context, SaturatingFlags.Sub);
public static void Sqsub_V(AILEmitterCtx Context)
EmitVectorSaturatingBinaryOpSx(Context, SaturatingFlags.Sub);
public static void Sqxtn_S(AILEmitterCtx Context)
EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: true, Scalar: true);
EmitScalarSaturatingNarrowOpSxSx(Context, () => { });
public static void Sqxtn_V(AILEmitterCtx Context)
EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: true, Scalar: false);
EmitVectorSaturatingNarrowOpSxSx(Context, () => { });
public static void Sqxtun_S(AILEmitterCtx Context)
EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: false, Scalar: true);
EmitScalarSaturatingNarrowOpSxZx(Context, () => { });
public static void Sqxtun_V(AILEmitterCtx Context)
EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: false, Scalar: false);
EmitVectorSaturatingNarrowOpSxZx(Context, () => { });
public static void Ssubw_V(AILEmitterCtx Context)
EmitVectorWidenRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Sub));
public static void Sub_S(AILEmitterCtx Context)
@ -1149,6 +1107,16 @@ namespace ChocolArm64.Instruction
EmitHighNarrow(Context, () => Context.Emit(OpCodes.Sub), Round: false);
public static void Suqadd_S(AILEmitterCtx Context)
EmitScalarSaturatingBinaryOpSx(Context, SaturatingFlags.Accumulate);
public static void Suqadd_V(AILEmitterCtx Context)
EmitVectorSaturatingBinaryOpSx(Context, SaturatingFlags.Accumulate);
public static void Uaba_V(AILEmitterCtx Context)
EmitVectorTernaryOpZx(Context, () =>
@ -1198,11 +1166,12 @@ namespace ChocolArm64.Instruction
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size);
for (int Index = 1; Index < (Bytes >> Op.Size); Index++)
for (int Index = 1; Index < Elems; Index++)
EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
@ -1270,14 +1239,49 @@ namespace ChocolArm64.Instruction
EmitVectorWidenRnRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Mul));
public static void Uqadd_S(AILEmitterCtx Context)
EmitScalarSaturatingBinaryOpZx(Context, SaturatingFlags.Add);
public static void Uqadd_V(AILEmitterCtx Context)
EmitVectorSaturatingBinaryOpZx(Context, SaturatingFlags.Add);
public static void Uqsub_S(AILEmitterCtx Context)
EmitScalarSaturatingBinaryOpZx(Context, SaturatingFlags.Sub);
public static void Uqsub_V(AILEmitterCtx Context)
EmitVectorSaturatingBinaryOpZx(Context, SaturatingFlags.Sub);
public static void Uqxtn_S(AILEmitterCtx Context)
EmitSaturatingExtNarrow(Context, SignedSrc: false, SignedDst: false, Scalar: true);
EmitScalarSaturatingNarrowOpZxZx(Context, () => { });
public static void Uqxtn_V(AILEmitterCtx Context)
EmitSaturatingExtNarrow(Context, SignedSrc: false, SignedDst: false, Scalar: false);
EmitVectorSaturatingNarrowOpZxZx(Context, () => { });
public static void Usqadd_S(AILEmitterCtx Context)
EmitScalarSaturatingBinaryOpZx(Context, SaturatingFlags.Accumulate);
public static void Usqadd_V(AILEmitterCtx Context)
EmitVectorSaturatingBinaryOpZx(Context, SaturatingFlags.Accumulate);
public static void Usubw_V(AILEmitterCtx Context)
EmitVectorWidenRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub));
@ -363,8 +363,8 @@ namespace ChocolArm64.Instruction
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Elems = (!Scalar ? Bytes >> Op.Size : 1);
int Bytes = Op.GetBitsCount() >> 3;
int Elems = !Scalar ? Bytes >> Op.Size : 1;
ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size));
@ -407,8 +407,8 @@ namespace ChocolArm64.Instruction
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Elems = (!Scalar ? Bytes >> Op.Size : 1);
int Bytes = Op.GetBitsCount() >> 3;
int Elems = !Scalar ? Bytes >> Op.Size : 1;
ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size));
@ -454,7 +454,7 @@ namespace ChocolArm64.Instruction
int SizeF = Op.Size & 1;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
for (int Index = 0; Index < Bytes >> SizeF + 2; Index++)
@ -337,7 +337,7 @@ namespace ChocolArm64.Instruction
int FBits = GetFBits(Context);
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
for (int Index = 0; Index < (Bytes >> SizeI); Index++)
@ -426,7 +426,7 @@ namespace ChocolArm64.Instruction
int FBits = GetFBits(Context);
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
for (int Index = 0; Index < (Bytes >> SizeI); Index++)
@ -3,6 +3,7 @@ using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Reflection.Emit;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
@ -335,17 +336,21 @@ namespace ChocolArm64.Instruction
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
if (Opers.HasFlag(OperFlags.Rd))
bool Rd = (Opers & OperFlags.Rd) != 0;
bool Rn = (Opers & OperFlags.Rn) != 0;
bool Rm = (Opers & OperFlags.Rm) != 0;
if (Rd)
EmitVectorExtract(Context, Op.Rd, 0, Op.Size, Signed);
if (Opers.HasFlag(OperFlags.Rn))
if (Rn)
EmitVectorExtract(Context, Op.Rn, 0, Op.Size, Signed);
if (Opers.HasFlag(OperFlags.Rm))
if (Rm)
EmitVectorExtract(Context, ((AOpCodeSimdReg)Op).Rm, 0, Op.Size, Signed);
@ -376,17 +381,21 @@ namespace ChocolArm64.Instruction
int SizeF = Op.Size & 1;
if (Opers.HasFlag(OperFlags.Ra))
bool Ra = (Opers & OperFlags.Ra) != 0;
bool Rn = (Opers & OperFlags.Rn) != 0;
bool Rm = (Opers & OperFlags.Rm) != 0;
if (Ra)
EmitVectorExtractF(Context, ((AOpCodeSimdReg)Op).Ra, 0, SizeF);
if (Opers.HasFlag(OperFlags.Rn))
if (Rn)
EmitVectorExtractF(Context, Op.Rn, 0, SizeF);
if (Opers.HasFlag(OperFlags.Rm))
if (Rm)
EmitVectorExtractF(Context, ((AOpCodeSimdReg)Op).Rm, 0, SizeF);
@ -417,21 +426,26 @@ namespace ChocolArm64.Instruction
int SizeF = Op.Size & 1;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> SizeF + 2;
for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++)
bool Rd = (Opers & OperFlags.Rd) != 0;
bool Rn = (Opers & OperFlags.Rn) != 0;
bool Rm = (Opers & OperFlags.Rm) != 0;
for (int Index = 0; Index < Elems; Index++)
if (Opers.HasFlag(OperFlags.Rd))
if (Rd)
EmitVectorExtractF(Context, Op.Rd, Index, SizeF);
if (Opers.HasFlag(OperFlags.Rn))
if (Rn)
EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
if (Opers.HasFlag(OperFlags.Rm))
if (Rm)
EmitVectorExtractF(Context, ((AOpCodeSimdReg)Op).Rm, Index, SizeF);
@ -467,9 +481,10 @@ namespace ChocolArm64.Instruction
int SizeF = Op.Size & 1;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> SizeF + 2;
for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++)
for (int Index = 0; Index < Elems; Index++)
if (Ternary)
@ -527,21 +542,26 @@ namespace ChocolArm64.Instruction
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
bool Rd = (Opers & OperFlags.Rd) != 0;
bool Rn = (Opers & OperFlags.Rn) != 0;
bool Rm = (Opers & OperFlags.Rm) != 0;
for (int Index = 0; Index < Elems; Index++)
if (Opers.HasFlag(OperFlags.Rd))
if (Rd)
EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed);
if (Opers.HasFlag(OperFlags.Rn))
if (Rn)
EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed);
if (Opers.HasFlag(OperFlags.Rm))
if (Rm)
EmitVectorExtract(Context, ((AOpCodeSimdReg)Op).Rm, Index, Op.Size, Signed);
@ -582,9 +602,10 @@ namespace ChocolArm64.Instruction
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
for (int Index = 0; Index < Elems; Index++)
if (Ternary)
@ -622,9 +643,10 @@ namespace ChocolArm64.Instruction
AOpCodeSimdImm Op = (AOpCodeSimdImm)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
for (int Index = 0; Index < Elems; Index++)
if (Binary)
@ -658,9 +680,6 @@ namespace ChocolArm64.Instruction
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int Elems = 8 >> Op.Size;
int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
@ -703,9 +722,6 @@ namespace ChocolArm64.Instruction
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int Elems = 8 >> Op.Size;
int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
@ -739,24 +755,28 @@ namespace ChocolArm64.Instruction
EmitVectorPairwiseOp(Context, Emit, false);
private static void EmitVectorPairwiseOp(AILEmitterCtx Context, Action Emit, bool Signed)
public static void EmitVectorPairwiseOp(AILEmitterCtx Context, Action Emit, bool Signed)
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Words = Op.GetBitsCount() >> 4;
int Pairs = Words >> Op.Size;
int Elems = Bytes >> Op.Size;
int Half = Elems >> 1;
for (int Index = 0; Index < Elems; Index++)
for (int Index = 0; Index < Pairs; Index++)
int Elem = (Index & (Half - 1)) << 1;
int Idx = Index << 1;
EmitVectorExtract(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 0, Op.Size, Signed);
EmitVectorExtract(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 1, Op.Size, Signed);
EmitVectorExtract(Context, Op.Rn, Idx, Op.Size, Signed);
EmitVectorExtract(Context, Op.Rn, Idx + 1, Op.Size, Signed);
EmitVectorExtract(Context, Op.Rm, Idx, Op.Size, Signed);
EmitVectorExtract(Context, Op.Rm, Idx + 1, Op.Size, Signed);
EmitVectorInsertTmp(Context, Pairs + Index, Op.Size);
EmitVectorInsertTmp(Context, Index, Op.Size);
@ -769,6 +789,383 @@ namespace ChocolArm64.Instruction
public enum SaturatingFlags
Scalar = 1 << 0,
Signed = 1 << 1,
Add = 1 << 2,
Sub = 1 << 3,
Accumulate = 1 << 4,
ScalarSx = Scalar | Signed,
ScalarZx = Scalar,
VectorSx = Signed,
VectorZx = 0,
public static void EmitScalarSaturatingUnaryOpSx(AILEmitterCtx Context, Action Emit)
EmitSaturatingUnaryOpSx(Context, Emit, SaturatingFlags.ScalarSx);
public static void EmitVectorSaturatingUnaryOpSx(AILEmitterCtx Context, Action Emit)
EmitSaturatingUnaryOpSx(Context, Emit, SaturatingFlags.VectorSx);
public static void EmitSaturatingUnaryOpSx(AILEmitterCtx Context, Action Emit, SaturatingFlags Flags)
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
bool Scalar = (Flags & SaturatingFlags.Scalar) != 0;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = !Scalar ? Bytes >> Op.Size : 1;
if (Scalar)
for (int Index = 0; Index < Elems; Index++)
EmitVectorExtractSx(Context, Op.Rn, Index, Op.Size);
EmitUnarySignedSatQAbsOrNeg(Context, Op.Size);
EmitVectorInsertTmp(Context, Index, Op.Size);
if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar)
EmitVectorZeroUpper(Context, Op.Rd);
public static void EmitScalarSaturatingBinaryOpSx(AILEmitterCtx Context, SaturatingFlags Flags)
EmitSaturatingBinaryOp(Context, SaturatingFlags.ScalarSx | Flags);
public static void EmitScalarSaturatingBinaryOpZx(AILEmitterCtx Context, SaturatingFlags Flags)
EmitSaturatingBinaryOp(Context, SaturatingFlags.ScalarZx | Flags);
public static void EmitVectorSaturatingBinaryOpSx(AILEmitterCtx Context, SaturatingFlags Flags)
EmitSaturatingBinaryOp(Context, SaturatingFlags.VectorSx | Flags);
public static void EmitVectorSaturatingBinaryOpZx(AILEmitterCtx Context, SaturatingFlags Flags)
EmitSaturatingBinaryOp(Context, SaturatingFlags.VectorZx | Flags);
public static void EmitSaturatingBinaryOp(AILEmitterCtx Context, SaturatingFlags Flags)
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
bool Scalar = (Flags & SaturatingFlags.Scalar) != 0;
bool Signed = (Flags & SaturatingFlags.Signed) != 0;
bool Add = (Flags & SaturatingFlags.Add) != 0;
bool Sub = (Flags & SaturatingFlags.Sub) != 0;
bool Accumulate = (Flags & SaturatingFlags.Accumulate) != 0;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = !Scalar ? Bytes >> Op.Size : 1;
if (Scalar)
if (Add || Sub)
for (int Index = 0; Index < Elems; Index++)
EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed);
EmitVectorExtract(Context, ((AOpCodeSimdReg)Op).Rm, Index, Op.Size, Signed);
if (Op.Size <= 2)
Context.Emit(Add ? OpCodes.Add : OpCodes.Sub);
EmitSatQ(Context, Op.Size, true, Signed);
else /* if (Op.Size == 3) */
if (Add)
EmitBinarySatQAdd(Context, Signed);
else /* if (Sub) */
EmitBinarySatQSub(Context, Signed);
EmitVectorInsertTmp(Context, Index, Op.Size);
else if (Accumulate)
for (int Index = 0; Index < Elems; Index++)
EmitVectorExtract(Context, Op.Rn, Index, Op.Size, !Signed);
EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed);
if (Op.Size <= 2)
EmitSatQ(Context, Op.Size, true, Signed);
else /* if (Op.Size == 3) */
EmitBinarySatQAccumulate(Context, Signed);
EmitVectorInsertTmp(Context, Index, Op.Size);
if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar)
EmitVectorZeroUpper(Context, Op.Rd);
public enum SaturatingNarrowFlags
Scalar = 1 << 0,
SignedSrc = 1 << 1,
SignedDst = 1 << 2,
ScalarSxSx = Scalar | SignedSrc | SignedDst,
ScalarSxZx = Scalar | SignedSrc,
ScalarZxSx = Scalar | SignedDst,
ScalarZxZx = Scalar,
VectorSxSx = SignedSrc | SignedDst,
VectorSxZx = SignedSrc,
VectorZxSx = SignedDst,
VectorZxZx = 0
public static void EmitScalarSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit)
EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.ScalarSxSx);
public static void EmitScalarSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit)
EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.ScalarSxZx);
public static void EmitScalarSaturatingNarrowOpZxSx(AILEmitterCtx Context, Action Emit)
EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.ScalarZxSx);
public static void EmitScalarSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit)
EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.ScalarZxZx);
public static void EmitVectorSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit)
EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.VectorSxSx);
public static void EmitVectorSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit)
EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.VectorSxZx);
public static void EmitVectorSaturatingNarrowOpZxSx(AILEmitterCtx Context, Action Emit)
EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.VectorZxSx);
public static void EmitVectorSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit)
EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.VectorZxZx);
public static void EmitSaturatingNarrowOp(AILEmitterCtx Context, Action Emit, SaturatingNarrowFlags Flags)
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
bool Scalar = (Flags & SaturatingNarrowFlags.Scalar) != 0;
bool SignedSrc = (Flags & SaturatingNarrowFlags.SignedSrc) != 0;
bool SignedDst = (Flags & SaturatingNarrowFlags.SignedDst) != 0;
int Elems = !Scalar ? 8 >> Op.Size : 1;
int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0;
if (Scalar)
if (Part != 0)
for (int Index = 0; Index < Elems; Index++)
EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc);
EmitSatQ(Context, Op.Size, SignedSrc, SignedDst);
EmitVectorInsertTmp(Context, Part + Index, Op.Size);
if (Part == 0)
EmitVectorZeroUpper(Context, Op.Rd);
// TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
public static void EmitSatQ(
AILEmitterCtx Context,
int SizeDst,
bool SignedSrc,
bool SignedDst)
if (SizeDst > 2)
throw new ArgumentOutOfRangeException(nameof(SizeDst));
if (SignedSrc)
ASoftFallback.EmitCall(Context, SignedDst
? nameof(ASoftFallback.SignedSrcSignedDstSatQ)
: nameof(ASoftFallback.SignedSrcUnsignedDstSatQ));
ASoftFallback.EmitCall(Context, SignedDst
? nameof(ASoftFallback.UnsignedSrcSignedDstSatQ)
: nameof(ASoftFallback.UnsignedSrcUnsignedDstSatQ));
// TSrc (8bit, 16bit, 32bit, 64bit) == TDst (8bit, 16bit, 32bit, 64bit); signed.
public static void EmitUnarySignedSatQAbsOrNeg(AILEmitterCtx Context, int Size)
int ESize = 8 << Size;
long TMaxValue = (1L << (ESize - 1)) - 1L;
long TMinValue = -(1L << (ESize - 1));
AILLabel LblFalse = new AILLabel();
Context.Emit(OpCodes.Brfalse_S, LblFalse);
// TSrcs (64bit) == TDst (64bit); signed, unsigned.
public static void EmitBinarySatQAdd(AILEmitterCtx Context, bool Signed)
if (((AOpCodeSimdReg)Context.CurrOp).Size < 3)
throw new InvalidOperationException();
ASoftFallback.EmitCall(Context, Signed
? nameof(ASoftFallback.BinarySignedSatQAdd)
: nameof(ASoftFallback.BinaryUnsignedSatQAdd));
// TSrcs (64bit) == TDst (64bit); signed, unsigned.
public static void EmitBinarySatQSub(AILEmitterCtx Context, bool Signed)
if (((AOpCodeSimdReg)Context.CurrOp).Size < 3)
throw new InvalidOperationException();
ASoftFallback.EmitCall(Context, Signed
? nameof(ASoftFallback.BinarySignedSatQSub)
: nameof(ASoftFallback.BinaryUnsignedSatQSub));
// TSrcs (64bit) == TDst (64bit); signed, unsigned.
public static void EmitBinarySatQAccumulate(AILEmitterCtx Context, bool Signed)
if (((AOpCodeSimd)Context.CurrOp).Size < 3)
throw new InvalidOperationException();
ASoftFallback.EmitCall(Context, Signed
? nameof(ASoftFallback.BinarySignedSatQAcc)
: nameof(ASoftFallback.BinaryUnsignedSatQAcc));
public static void EmitSetFpsrQCFlag(AILEmitterCtx Context)
const int QCFlagBit = 27;
Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpsr));
Context.EmitLdc_I4(1 << QCFlagBit);
Context.EmitCallPropSet(typeof(AThreadState), nameof(AThreadState.Fpsr));
public static void EmitScalarSet(AILEmitterCtx Context, int Reg, int Size)
EmitVectorZeroAll(Context, Reg);
@ -838,6 +1235,11 @@ namespace ChocolArm64.Instruction
EmitVectorInsert(Context, Rd, 0, 3, 0);
public static void EmitVectorZeroLowerTmp(AILEmitterCtx Context)
EmitVectorInsertTmp(Context, 0, 3, 0);
public static void EmitVectorZeroUpper(AILEmitterCtx Context, int Rd)
EmitVectorInsert(Context, Rd, 1, 3, 0);
@ -883,6 +1285,20 @@ namespace ChocolArm64.Instruction
public static void EmitVectorInsertTmp(AILEmitterCtx Context, int Index, int Size, long Value)
ThrowIfInvalid(Index, Size);
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));
public static void EmitVectorInsertF(AILEmitterCtx Context, int Reg, int Index, int Size)
ThrowIfInvalidF(Index, Size);
@ -55,7 +55,7 @@ namespace ChocolArm64.Instruction
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
for (int Index = 0; Index < Elems; Index++)
@ -195,7 +195,7 @@ namespace ChocolArm64.Instruction
throw new InvalidOperationException();
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
int ContainerMask = (1 << (ContainerSize - Op.Size)) - 1;
@ -105,13 +105,14 @@ namespace ChocolArm64.Instruction
throw new InvalidOperationException();
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
for (int SElem = 0; SElem < Op.SElems; SElem++)
int Rt = (Op.Rt + SElem) & 0x1f;
for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
for (int Index = 0; Index < Elems; Index++)
@ -14,9 +14,10 @@ namespace ChocolArm64.Instruction
AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
for (int Index = 0; Index < Elems; Index++)
@ -42,9 +43,10 @@ namespace ChocolArm64.Instruction
AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
for (int Index = 0; Index < Elems; Index++)
EmitVectorExtractZx(Context, Op.Rn, Op.DstIndex, Op.Size);
@ -64,7 +66,7 @@ namespace ChocolArm64.Instruction
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Position = Op.Imm4;
@ -293,13 +295,22 @@ namespace ChocolArm64.Instruction
int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
if (Part != 0)
for (int Index = 0; Index < Elems; Index++)
EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1);
EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size);
EmitVectorInsertTmp(Context, Part + Index, Op.Size);
if (Part == 0)
EmitVectorZeroUpper(Context, Op.Rd);
@ -329,17 +340,18 @@ namespace ChocolArm64.Instruction
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Words = Op.GetBitsCount() >> 4;
int Pairs = Words >> Op.Size;
int Elems = Bytes >> Op.Size;
for (int Index = 0; Index < Elems; Index++)
for (int Index = 0; Index < Pairs; Index++)
int Elem = (Index & ~1) + Part;
int Idx = Index << 1;
EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size);
EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size);
EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size);
EmitVectorInsertTmp(Context, Index, Op.Size);
EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
EmitVectorInsertTmp(Context, Idx, Op.Size);
@ -355,17 +367,17 @@ namespace ChocolArm64.Instruction
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Words = Op.GetBitsCount() >> 4;
int Pairs = Words >> Op.Size;
int Elems = Bytes >> Op.Size;
int Half = Elems >> 1;
for (int Index = 0; Index < Elems; Index++)
for (int Index = 0; Index < Pairs; Index++)
int Elem = Part + ((Index & (Half - 1)) << 1);
int Idx = Index << 1;
EmitVectorExtractZx(Context, Index < Half ? Op.Rn : Op.Rm, Elem, Op.Size);
EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size);
EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size);
EmitVectorInsertTmp(Context, Pairs + Index, Op.Size);
EmitVectorInsertTmp(Context, Index, Op.Size);
@ -382,18 +394,20 @@ namespace ChocolArm64.Instruction
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Words = Op.GetBitsCount() >> 4;
int Pairs = Words >> Op.Size;
int Elems = Bytes >> Op.Size;
int Half = Elems >> 1;
int Base = Part != 0 ? Pairs : 0;
for (int Index = 0; Index < Elems; Index++)
for (int Index = 0; Index < Pairs; Index++)
int Elem = Part * Half + (Index >> 1);
int Idx = Index << 1;
EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size);
EmitVectorExtractZx(Context, Op.Rn, Base + Index, Op.Size);
EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size);
EmitVectorInsertTmp(Context, Index, Op.Size);
EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
EmitVectorInsertTmp(Context, Idx, Op.Size);
@ -27,9 +27,7 @@ namespace ChocolArm64.Instruction
AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
int Shift = Op.Imm - (8 << Op.Size);
EmitVectorShImmBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift);
EmitVectorShImmBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op));
public static void Shll_V(AILEmitterCtx Context)
@ -45,22 +43,21 @@ namespace ChocolArm64.Instruction
AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
int Shift = (8 << (Op.Size + 1)) - Op.Imm;
EmitVectorShImmNarrowBinaryZx(Context, () => Context.Emit(OpCodes.Shr_Un), Shift);
EmitVectorShImmNarrowBinaryZx(Context, () => Context.Emit(OpCodes.Shr_Un), GetImmShr(Op));
public static void Sli_V(AILEmitterCtx Context)
AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
int Shift = Op.Imm - (8 << Op.Size);
int Shift = GetImmShl(Op);
ulong Mask = Shift != 0 ? ulong.MaxValue >> (64 - Shift) : 0;
for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
for (int Index = 0; Index < Elems; Index++)
EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
@ -84,6 +81,39 @@ namespace ChocolArm64.Instruction
public static void Sqrshrn_V(AILEmitterCtx Context)
AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
int Shift = GetImmShr(Op);
long RoundConst = 1L << (Shift - 1);
Action Emit = () =>
EmitVectorSaturatingNarrowOpSxSx(Context, Emit);
public static void Srshr_V(AILEmitterCtx Context)
AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
int Shift = GetImmShr(Op);
long RoundConst = 1L << (Shift - 1);
EmitVectorRoundShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), Shift, RoundConst);
public static void Sshl_V(AILEmitterCtx Context)
EmitVectorShl(Context, Signed: true);
@ -93,9 +123,7 @@ namespace ChocolArm64.Instruction
AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
int Shift = Op.Imm - (8 << Op.Size);
EmitVectorShImmWidenBinarySx(Context, () => Context.Emit(OpCodes.Shl), Shift);
EmitVectorShImmWidenBinarySx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op));
public static void Sshr_S(AILEmitterCtx Context)
@ -115,24 +143,20 @@ namespace ChocolArm64.Instruction
AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
int Shift = (8 << (Op.Size + 1)) - Op.Imm;
EmitVectorShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), Shift);
EmitVectorShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), GetImmShr(Op));
public static void Ssra_V(AILEmitterCtx Context)
AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
int Shift = (8 << (Op.Size + 1)) - Op.Imm;
Action Emit = () =>
EmitVectorShImmTernarySx(Context, Emit, Shift);
EmitVectorShImmTernarySx(Context, Emit, GetImmShr(Op));
public static void Ushl_V(AILEmitterCtx Context)
@ -144,9 +168,7 @@ namespace ChocolArm64.Instruction
AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
int Shift = Op.Imm - (8 << Op.Size);
EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift);
EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op));
public static void Ushr_S(AILEmitterCtx Context)
@ -251,28 +273,51 @@ namespace ChocolArm64.Instruction
private enum ShImmFlags
None = 0,
Signed = 1 << 0,
Ternary = 1 << 1,
Rounded = 1 << 2,
SignedTernary = Signed | Ternary,
SignedRounded = Signed | Rounded
private static void EmitVectorShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm)
EmitVectorShImmOp(Context, Emit, Imm, false, true);
EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.Signed);
private static void EmitVectorShImmTernarySx(AILEmitterCtx Context, Action Emit, int Imm)
EmitVectorShImmOp(Context, Emit, Imm, true, true);
EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedTernary);
private static void EmitVectorShImmBinaryZx(AILEmitterCtx Context, Action Emit, int Imm)
EmitVectorShImmOp(Context, Emit, Imm, false, false);
EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.None);
private static void EmitVectorShImmOp(AILEmitterCtx Context, Action Emit, int Imm, bool Ternary, bool Signed)
private static void EmitVectorRoundShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm, long Rc)
EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedRounded, Rc);
private static void EmitVectorShImmOp(AILEmitterCtx Context, Action Emit, int Imm, ShImmFlags Flags, long Rc = 0)
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
bool Signed = (Flags & ShImmFlags.Signed) != 0;
bool Ternary = (Flags & ShImmFlags.Ternary) != 0;
bool Rounded = (Flags & ShImmFlags.Rounded) != 0;
for (int Index = 0; Index < Elems; Index++)
if (Ternary)
@ -281,6 +326,13 @@ namespace ChocolArm64.Instruction
EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed);
if (Rounded)
@ -1,3 +1,4 @@
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
@ -10,26 +11,313 @@ namespace ChocolArm64.Instruction
Context.EmitCall(typeof(ASoftFallback), MthdName);
public static ulong CountLeadingSigns(ulong Value, int Size)
public static long BinarySignedSatQAdd(long op1, long op2, AThreadState State)
return CountLeadingZeros((Value >> 1) ^ Value, Size - 1);
long Add = op1 + op2;
if ((~(op1 ^ op2) & (op1 ^ Add)) < 0L)
if (op1 < 0L)
return long.MinValue;
return long.MaxValue;
return Add;
public static ulong CountLeadingZeros(ulong Value, int Size)
public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2, AThreadState State)
int HighBit = Size - 1;
ulong Add = op1 + op2;
if ((Add < op1) && (Add < op2))
return ulong.MaxValue;
return Add;
public static long BinarySignedSatQSub(long op1, long op2, AThreadState State)
long Sub = op1 - op2;
if (((op1 ^ op2) & (op1 ^ Sub)) < 0L)
if (op1 < 0L)
return long.MinValue;
return long.MaxValue;
return Sub;
public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2, AThreadState State)
ulong Sub = op1 - op2;
if (op1 < op2)
return ulong.MinValue;
return Sub;
public static long BinarySignedSatQAcc(ulong op1, long op2, AThreadState State)
if (op1 <= (ulong)long.MaxValue)
// op1 from ulong.MinValue to (ulong)long.MaxValue
// op2 from long.MinValue to long.MaxValue
long Add = (long)op1 + op2;
if ((~op2 & Add) < 0L)
return long.MaxValue;
return Add;
else if (op2 >= 0L)
// op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
// op2 from (long)ulong.MinValue to long.MaxValue
return long.MaxValue;
// op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
// op2 from long.MinValue to (long)ulong.MinValue - 1L
ulong Add = op1 + (ulong)op2;
if (Add > (ulong)long.MaxValue)
return long.MaxValue;
return (long)Add;
public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2, AThreadState State)
if (op1 >= 0L)
// op1 from (long)ulong.MinValue to long.MaxValue
// op2 from ulong.MinValue to ulong.MaxValue
ulong Add = (ulong)op1 + op2;
if ((Add < (ulong)op1) && (Add < op2))
return ulong.MaxValue;
return Add;
else if (op2 > (ulong)long.MaxValue)
// op1 from long.MinValue to (long)ulong.MinValue - 1L
// op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
return (ulong)op1 + op2;
// op1 from long.MinValue to (long)ulong.MinValue - 1L
// op2 from ulong.MinValue to (ulong)long.MaxValue
long Add = op1 + (long)op2;
if (Add < (long)ulong.MinValue)
return ulong.MinValue;
return (ulong)Add;
public static long SignedSrcSignedDstSatQ(long op, int Size, AThreadState State)
int ESize = 8 << Size;
long TMaxValue = (1L << (ESize - 1)) - 1L;
long TMinValue = -(1L << (ESize - 1));
if (op > TMaxValue)
return TMaxValue;
else if (op < TMinValue)
return TMinValue;
return op;
public static ulong SignedSrcUnsignedDstSatQ(long op, int Size, AThreadState State)
int ESize = 8 << Size;
ulong TMaxValue = (1UL << ESize) - 1UL;
ulong TMinValue = 0UL;
if (op > (long)TMaxValue)
return TMaxValue;
else if (op < (long)TMinValue)
return TMinValue;
return (ulong)op;
public static long UnsignedSrcSignedDstSatQ(ulong op, int Size, AThreadState State)
int ESize = 8 << Size;
long TMaxValue = (1L << (ESize - 1)) - 1L;
if (op > (ulong)TMaxValue)
return TMaxValue;
return (long)op;
public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int Size, AThreadState State)
int ESize = 8 << Size;
ulong TMaxValue = (1UL << ESize) - 1UL;
if (op > TMaxValue)
return TMaxValue;
return op;
private static void SetFpsrQCFlag(AThreadState State)
const int QCFlagBit = 27;
State.Fpsr |= 1 << QCFlagBit;
public static ulong CountLeadingSigns(ulong Value, int Size)
Value ^= Value >> 1;
int HighBit = Size - 2;
for (int Bit = HighBit; Bit >= 0; Bit--)
if (((Value >> Bit) & 1) != 0)
if (((Value >> Bit) & 0b1) != 0)
return (ulong)(HighBit - Bit);
return (ulong)(Size - 1);
private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
public static ulong CountLeadingZeros(ulong Value, int Size)
if (Value == 0)
return (ulong)Size;
int NibbleIdx = Size;
int PreCount, Count = 0;
NibbleIdx -= 4;
PreCount = ClzNibbleTbl[(Value >> NibbleIdx) & 0b1111];
Count += PreCount;
while (PreCount == 4);
return (ulong)Count;
public static uint CountSetBits8(uint Value)
Value = ((Value >> 1) & 0x55) + (Value & 0x55);
@ -168,9 +456,10 @@ namespace ChocolArm64.Instruction
public static long SMulHi128(long LHS, long RHS)
long Result = (long)UMulHi128((ulong)(LHS), (ulong)(RHS));
long Result = (long)UMulHi128((ulong)LHS, (ulong)RHS);
if (LHS < 0) Result -= RHS;
if (RHS < 0) Result -= LHS;
return Result;
@ -187,6 +476,7 @@ namespace ChocolArm64.Instruction
ulong Z1 = T & 0xFFFFFFFF;
ulong Z0 = T >> 32;
Z1 += LLow * RHigh;
return LHigh * RHigh + Z0 + (Z1 >> 32);
@ -79,7 +79,7 @@ namespace ChocolArm64.Instruction
if (scaled == 0)
// Zero -> Infinity
return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7ff0000000000000));
return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7FF0000000000000));
// Denormal
@ -94,7 +94,7 @@ namespace ChocolArm64.Instruction
if (x_sign != 0)
// Negative -> NaN
return BitConverter.Int64BitsToDouble((long)0x7ff8000000000000);
return BitConverter.Int64BitsToDouble((long)0x7FF8000000000000);
if (x_exp == 0x7ff && scaled == 0)
@ -153,7 +153,7 @@ namespace ChocolArm64.Instruction
if (scaled == 0)
// Zero -> Infinity
return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7ff0000000000000));
return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7FF0000000000000));
// Denormal
@ -208,8 +208,8 @@ namespace ChocolArm64.Instruction
ulong op1_other = op1_bits & 0x7FFFFFFFFFFFFFFF;
ulong op2_other = op2_bits & 0x7FFFFFFFFFFFFFFF;
bool inf1 = op1_other == 0x7ff0000000000000;
bool inf2 = op2_other == 0x7ff0000000000000;
bool inf1 = op1_other == 0x7FF0000000000000;
bool inf2 = op2_other == 0x7FF0000000000000;
bool zero1 = op1_other == 0;
bool zero2 = op2_other == 0;
@ -220,7 +220,7 @@ namespace ChocolArm64.Instruction
else if (inf1 || inf2)
// Infinity
return BitConverter.Int64BitsToDouble((long)(0x7ff0000000000000 | (op1_sign ^ op2_sign)));
return BitConverter.Int64BitsToDouble((long)(0x7FF0000000000000 | (op1_sign ^ op2_sign)));
return 2.0 + op1 * op2;
@ -261,5 +261,277 @@ namespace ChocolArm64.Instruction
uint new_exp = (uint)((exponent + 127) & 0xFF) << 23;
return BitConverter.Int32BitsToSingle((int)((x_sign << 31) | new_exp | (x_mantissa << 13)));
public static float MaxNum(float op1, float op2)
uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1);
uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2);
if (IsQNaN(op1_bits) && !IsQNaN(op2_bits))
op1 = float.NegativeInfinity;
else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits))
op2 = float.NegativeInfinity;
return Max(op1, op2);
public static double MaxNum(double op1, double op2)
ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1);
ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2);
if (IsQNaN(op1_bits) && !IsQNaN(op2_bits))
op1 = double.NegativeInfinity;
else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits))
op2 = double.NegativeInfinity;
return Max(op1, op2);
public static float Max(float op1, float op2)
// Fast path
if (op1 > op2)
return op1;
if (op1 < op2 || (op1 == op2 && op2 != 0))
return op2;
uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1);
uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2);
// Handle NaN cases
if (ProcessNaNs(op1_bits, op2_bits, out uint op_bits))
return BitConverter.Int32BitsToSingle((int)op_bits);
// Return the most positive zero
if ((op1_bits & op2_bits) == 0x80000000u)
return BitConverter.Int32BitsToSingle(int.MinValue);
return 0;
public static double Max(double op1, double op2)
// Fast path
if (op1 > op2)
return op1;
if (op1 < op2 || (op1 == op2 && op2 != 0))
return op2;
ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1);
ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2);
// Handle NaN cases
if (ProcessNaNs(op1_bits, op2_bits, out ulong op_bits))
return BitConverter.Int64BitsToDouble((long)op_bits);
// Return the most positive zero
if ((op1_bits & op2_bits) == 0x8000000000000000ul)
return BitConverter.Int64BitsToDouble(long.MinValue);
return 0;
public static float MinNum(float op1, float op2)
uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1);
uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2);
if (IsQNaN(op1_bits) && !IsQNaN(op2_bits))
op1 = float.PositiveInfinity;
else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits))
op2 = float.PositiveInfinity;
return Max(op1, op2);
public static double MinNum(double op1, double op2)
ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1);
ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2);
if (IsQNaN(op1_bits) && !IsQNaN(op2_bits))
op1 = double.PositiveInfinity;
else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits))
op2 = double.PositiveInfinity;
return Min(op1, op2);
public static float Min(float op1, float op2)
// Fast path
if (op1 < op2)
return op1;
if (op1 > op2 || (op1 == op2 && op2 != 0))
return op2;
uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1);
uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2);
// Handle NaN cases
if (ProcessNaNs(op1_bits, op2_bits, out uint op_bits))
return BitConverter.Int32BitsToSingle((int)op_bits);
// Return the most negative zero
if ((op1_bits | op2_bits) == 0x80000000u)
return BitConverter.Int32BitsToSingle(int.MinValue);
return 0;
public static double Min(double op1, double op2)
// Fast path
if (op1 < op2)
return op1;
if (op1 > op2 || (op1 == op2 && op2 != 0))
return op2;
ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1);
ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2);
// Handle NaN cases
if (ProcessNaNs(op1_bits, op2_bits, out ulong op_bits))
return BitConverter.Int64BitsToDouble((long)op_bits);
// Return the most negative zero
if ((op1_bits | op2_bits) == 0x8000000000000000ul)
return BitConverter.Int64BitsToDouble(long.MinValue);
return 0;
private static bool ProcessNaNs(uint op1_bits, uint op2_bits, out uint op_bits)
if (IsSNaN(op1_bits))
op_bits = op1_bits | (1u << 22); // op1 is SNaN, return QNaN op1
else if (IsSNaN(op2_bits))
op_bits = op2_bits | (1u << 22); // op2 is SNaN, return QNaN op2
else if (IsQNaN(op1_bits))
op_bits = op1_bits; // op1 is QNaN, return QNaN op1
else if (IsQNaN(op2_bits))
op_bits = op2_bits; // op2 is QNaN, return QNaN op2
op_bits = 0;
return false;
return true;
private static bool ProcessNaNs(ulong op1_bits, ulong op2_bits, out ulong op_bits)
if (IsSNaN(op1_bits))
op_bits = op1_bits | (1ul << 51); // op1 is SNaN, return QNaN op1
else if (IsSNaN(op2_bits))
op_bits = op2_bits | (1ul << 51); // op2 is SNaN, return QNaN op2
else if (IsQNaN(op1_bits))
op_bits = op1_bits; // op1 is QNaN, return QNaN op1
else if (IsQNaN(op2_bits))
op_bits = op2_bits; // op2 is QNaN, return QNaN op2
op_bits = 0;
return false;
return true;
private static bool IsQNaN(uint op_bits)
return (op_bits & 0x007FFFFF) != 0 &&
(op_bits & 0x7FC00000) == 0x7FC00000;
private static bool IsQNaN(ulong op_bits)
return (op_bits & 0x000FFFFFFFFFFFFF) != 0 &&
(op_bits & 0x7FF8000000000000) == 0x7FF8000000000000;
private static bool IsSNaN(uint op_bits)
return (op_bits & 0x007FFFFF) != 0 &&
(op_bits & 0x7FC00000) == 0x7F800000;
private static bool IsSNaN(ulong op_bits)
return (op_bits & 0x000FFFFFFFFFFFFF) != 0 &&
(op_bits & 0x7FF8000000000000) == 0x7FF0000000000000;
@ -93,86 +93,6 @@ namespace ChocolArm64.Instruction
Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
public static double Max(double LHS, double RHS)
if (LHS == 0.0 && RHS == 0.0)
if (BitConverter.DoubleToInt64Bits(LHS) < 0 &&
BitConverter.DoubleToInt64Bits(RHS) < 0)
return -0.0;
return 0.0;
if (LHS > RHS)
return LHS;
if (double.IsNaN(LHS))
return LHS;
return RHS;
public static float MaxF(float LHS, float RHS)
if (LHS == 0.0 && RHS == 0.0)
if (BitConverter.SingleToInt32Bits(LHS) < 0 &&
BitConverter.SingleToInt32Bits(RHS) < 0)
return -0.0f;
return 0.0f;
if (LHS > RHS)
return LHS;
if (float.IsNaN(LHS))
return LHS;
return RHS;
public static double Min(double LHS, double RHS)
if (LHS == 0.0 && RHS == 0.0)
if (BitConverter.DoubleToInt64Bits(LHS) < 0 ||
BitConverter.DoubleToInt64Bits(RHS) < 0)
return -0.0;
return 0.0;
if (LHS < RHS)
return LHS;
if (double.IsNaN(LHS))
return LHS;
return RHS;
public static float MinF(float LHS, float RHS)
if (LHS == 0.0 && RHS == 0.0)
if (BitConverter.SingleToInt32Bits(LHS) < 0 ||
BitConverter.SingleToInt32Bits(RHS) < 0)
return -0.0f;
return 0.0f;
if (LHS < RHS)
return LHS;
if (float.IsNaN(LHS))
return LHS;
return RHS;
public static double Round(double Value, int Fpcr)
switch ((ARoundMode)((Fpcr >> 22) & 3))
@ -160,23 +160,23 @@ namespace ChocolArm64.Memory
return HostPageSize;
public bool[] IsRegionModified(long Position, long Size)
public (bool[], long) IsRegionModified(long Position, long Size)
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
return null;
return (null, 0);
long EndPos = Position + Size;
if ((ulong)EndPos < (ulong)Position)
return null;
return (null, 0);
if ((ulong)EndPos > AMemoryMgr.RamSize)
return null;
return (null, 0);
IntPtr MemAddress = new IntPtr(RamPtr + Position);
@ -201,7 +201,14 @@ namespace ChocolArm64.Memory
Modified[(VA - Position) / HostPageSize] = true;
return Modified;
return (Modified, Count);
public IntPtr GetHostAddress(long Position, long Size)
EnsureRangeIsValid(Position, Size, AMemoryPerm.Read);
return (IntPtr)(RamPtr + (ulong)Position);
public sbyte ReadSByte(long Position)
Normal file
Normal file
@ -0,0 +1,91 @@
namespace Ryujinx.Audio.Adpcm
public static class AdpcmDecoder
private const int SamplesPerFrame = 14;
private const int BytesPerFrame = 8;
public static int[] Decode(byte[] Buffer, AdpcmDecoderContext Context)
int Samples = GetSamplesCountFromSize(Buffer.Length);
int[] Pcm = new int[Samples * 2];
short History0 = Context.History0;
short History1 = Context.History1;
int InputOffset = 0;
int OutputOffset = 0;
while (InputOffset < Buffer.Length)
byte Header = Buffer[InputOffset++];
int Scale = 0x800 << (Header & 0xf);
int CoeffIndex = (Header >> 4) & 7;
short Coeff0 = Context.Coefficients[CoeffIndex * 2 + 0];
short Coeff1 = Context.Coefficients[CoeffIndex * 2 + 1];
int FrameSamples = SamplesPerFrame;
if (FrameSamples > Samples)
FrameSamples = Samples;
int Value = 0;
for (int SampleIndex = 0; SampleIndex < FrameSamples; SampleIndex++)
int Sample;
if ((SampleIndex & 1) == 0)
Value = Buffer[InputOffset++];
Sample = (Value << 24) >> 28;
Sample = (Value << 28) >> 28;
int Prediction = Coeff0 * History0 + Coeff1 * History1;
Sample = (Sample * Scale + Prediction + 0x400) >> 11;
short SaturatedSample = DspUtils.Saturate(Sample);
History1 = History0;
History0 = SaturatedSample;
Pcm[OutputOffset++] = SaturatedSample;
Pcm[OutputOffset++] = SaturatedSample;
Samples -= FrameSamples;
Context.History0 = History0;
Context.History1 = History1;
return Pcm;
public static long GetSizeFromSamplesCount(int SamplesCount)
int Frames = SamplesCount / SamplesPerFrame;
return Frames * BytesPerFrame;
public static int GetSamplesCountFromSize(long Size)
int Frames = (int)(Size / BytesPerFrame);
return Frames * SamplesPerFrame;
Normal file
Normal file
@ -0,0 +1,10 @@
namespace Ryujinx.Audio.Adpcm
public class AdpcmDecoderContext
public short[] Coefficients;
public short History0;
public short History1;
Normal file
Normal file
@ -0,0 +1,16 @@
namespace Ryujinx.Audio.Adpcm
public static class DspUtils
public static short Saturate(int Value)
if (Value > short.MaxValue)
Value = short.MaxValue;
if (Value < short.MinValue)
Value = short.MinValue;
return (short)Value;
@ -2,11 +2,7 @@ namespace Ryujinx.Audio
public interface IAalOutput
int OpenTrack(
int SampleRate,
int Channels,
ReleaseCallback Callback,
out AudioFormat Format);
int OpenTrack(int SampleRate, int Channels, ReleaseCallback Callback);
void CloseTrack(int Track);
@ -14,7 +10,7 @@ namespace Ryujinx.Audio
long[] GetReleasedBuffers(int Track, int MaxCount);
void AppendBuffer(int Track, long Tag, byte[] Buffer);
void AppendBuffer<T>(int Track, long Tag, T[] Buffer) where T : struct;
void Start(int Track);
void Stop(int Track);
@ -3,6 +3,7 @@ using OpenTK.Audio.OpenAL;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Threading;
namespace Ryujinx.Audio.OpenAL
@ -221,20 +222,15 @@ namespace Ryujinx.Audio.OpenAL
//If it's not slept it will waste cycles
while (KeepPolling);
public int OpenTrack(
int SampleRate,
int Channels,
ReleaseCallback Callback,
out AudioFormat Format)
public int OpenTrack(int SampleRate, int Channels, ReleaseCallback Callback)
Format = AudioFormat.PcmInt16;
Track Td = new Track(SampleRate, GetALFormat(Channels, Format), Callback);
Track Td = new Track(SampleRate, GetALFormat(Channels), Callback);
for (int Id = 0; Id < MaxTracks; Id++)
@ -247,38 +243,16 @@ namespace Ryujinx.Audio.OpenAL
return -1;
private ALFormat GetALFormat(int Channels, AudioFormat Format)
private ALFormat GetALFormat(int Channels)
if (Channels == 1)
switch (Channels)
switch (Format)
case AudioFormat.PcmInt8: return ALFormat.Mono8;
case AudioFormat.PcmInt16: return ALFormat.Mono16;
else if (Channels == 2)
switch (Format)
case AudioFormat.PcmInt8: return ALFormat.Stereo8;
case AudioFormat.PcmInt16: return ALFormat.Stereo16;
else if (Channels == 6)
switch (Format)
case AudioFormat.PcmInt8: return ALFormat.Multi51Chn8Ext;
case AudioFormat.PcmInt16: return ALFormat.Multi51Chn16Ext;
throw new ArgumentOutOfRangeException(nameof(Channels));
case 1: return ALFormat.Mono16;
case 2: return ALFormat.Stereo16;
case 6: return ALFormat.Multi51Chn16Ext;
throw new ArgumentException(nameof(Format));
throw new ArgumentOutOfRangeException(nameof(Channels));
public void CloseTrack(int Track)
@ -309,13 +283,15 @@ namespace Ryujinx.Audio.OpenAL
return null;
public void AppendBuffer(int Track, long Tag, byte[] Buffer)
public void AppendBuffer<T>(int Track, long Tag, T[] Buffer) where T : struct
if (Tracks.TryGetValue(Track, out Track Td))
int BufferId = Td.AppendBuffer(Tag);
AL.BufferData(BufferId, Td.Format, Buffer, Buffer.Length, Td.SampleRate);
int Size = Buffer.Length * Marshal.SizeOf<T>();
AL.BufferData<T>(BufferId, Td.Format, Buffer, Size, Td.SampleRate);
AL.SourceQueueBuffer(Td.SourceId, BufferId);
@ -366,7 +342,5 @@ namespace Ryujinx.Audio.OpenAL
return PlaybackState.Stopped;
@ -5,19 +5,24 @@ namespace Ryujinx.Graphics.Gal
R32G32B32A32 = 0x1,
R16G16B16A16 = 0x3,
A8B8G8R8 = 0x8,
A2B10G10R10 = 0x9,
R16_G16 = 0xc,
R32 = 0xf,
BC6H_SF16 = 0x10,
BC6H_UF16 = 0x11,
A1B5G5R5 = 0x14,
B5G6R5 = 0x15,
BC7U = 0x17,
G8R8 = 0x18,
R16 = 0x1b,
R8 = 0x1d,
BF10GF11RF11 = 0x21,
BC1 = 0x24,
BC2 = 0x25,
BC3 = 0x26,
BC4 = 0x27,
BC5 = 0x28,
Z24S8 = 0x29,
ZF32 = 0x2f,
Astc2D4x4 = 0x40,
Astc2D5x5 = 0x41,
@ -14,7 +14,7 @@ namespace Ryujinx.Graphics.Gal
void Set(byte[] Data, int Width, int Height);
void SetTransform(float SX, float SY, float Rotate, float TX, float TY);
void SetTransform(bool FlipX, bool FlipY, int Top, int Left, int Right, int Bottom);
void SetWindowSize(int Width, int Height);
@ -22,6 +22,25 @@ namespace Ryujinx.Graphics.Gal
void Render();
void Copy(
long SrcKey,
long DstKey,
int SrcX0,
int SrcY0,
int SrcX1,
int SrcY1,
int DstX0,
int DstY0,
int DstX1,
int DstY1);
void GetBufferData(long Key, Action<byte[]> Callback);
void SetBufferData(
long Key,
int Width,
int Height,
GalTextureFormat Format,
byte[] Buffer);
@ -1,3 +1,5 @@
using System;
namespace Ryujinx.Graphics.Gal
public interface IGalRasterizer
@ -45,9 +47,9 @@ namespace Ryujinx.Graphics.Gal
void SetPrimitiveRestartIndex(uint Index);
void CreateVbo(long Key, byte[] Buffer);
void CreateVbo(long Key, int DataSize, IntPtr HostAddress);
void CreateIbo(long Key, byte[] Buffer);
void CreateIbo(long Key, int DataSize, IntPtr HostAddress);
void SetVertexArray(int Stride, long VboKey, GalVertexAttrib[] Attribs);
@ -1,3 +1,4 @@
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Gal
@ -10,7 +11,7 @@ namespace Ryujinx.Graphics.Gal
IEnumerable<ShaderDeclInfo> GetTextureUsage(long Key);
void SetConstBuffer(long Key, int Cbuf, byte[] Data);
void SetConstBuffer(long Key, int Cbuf, int DataSize, IntPtr HostAddress);
void EnsureTextureBinding(string UniformName, int Value);
@ -18,6 +19,8 @@ namespace Ryujinx.Graphics.Gal
void Bind(long Key);
void Unbind(GalShaderType Type);
void BindProgram();
@ -1,13 +0,0 @@
#version 330 core
precision highp float;
uniform sampler2D tex;
in vec2 tex_coord;
out vec4 out_frag_color;
void main(void) {
out_frag_color = texture(tex, tex_coord);
@ -1,28 +0,0 @@
#version 330 core
precision highp float;
uniform mat2 transform;
uniform vec2 window_size;
uniform vec2 offset;
layout(location = 0) in vec2 in_position;
layout(location = 1) in vec2 in_tex_coord;
out vec2 tex_coord;
// Have a fixed aspect ratio, fit the image within the available space.
vec2 get_scale_ratio(void) {
vec2 native_size = vec2(1280, 720);
vec2 ratio = vec2(
(window_size.y * native_size.x) / (native_size.y * window_size.x),
(window_size.x * native_size.y) / (native_size.x * window_size.y)
return min(ratio, 1);
void main(void) {
tex_coord = in_tex_coord;
vec2 t_pos = (transform * in_position) + offset;
gl_Position = vec4(t_pos * get_scale_ratio(), 0, 1);
@ -132,6 +132,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
case GalTextureFormat.R32G32B32A32: return (PixelFormat.Rgba, PixelType.Float);
case GalTextureFormat.R16G16B16A16: return (PixelFormat.Rgba, PixelType.HalfFloat);
case GalTextureFormat.A8B8G8R8: return (PixelFormat.Rgba, PixelType.UnsignedByte);
case GalTextureFormat.A2B10G10R10: return (PixelFormat.Rgba, PixelType.UnsignedInt2101010Reversed);
case GalTextureFormat.R16_G16: return (PixelFormat.RgInteger, PixelType.UnsignedShort);
case GalTextureFormat.R32: return (PixelFormat.Red, PixelType.Float);
case GalTextureFormat.A1B5G5R5: return (PixelFormat.Rgba, PixelType.UnsignedShort5551);
@ -140,6 +141,8 @@ namespace Ryujinx.Graphics.Gal.OpenGL
case GalTextureFormat.R16: return (PixelFormat.Red, PixelType.HalfFloat);
case GalTextureFormat.R8: return (PixelFormat.Red, PixelType.UnsignedByte);
case GalTextureFormat.ZF32: return (PixelFormat.DepthComponent, PixelType.Float);
case GalTextureFormat.BF10GF11RF11: return (PixelFormat.Rgb, PixelType.UnsignedInt10F11F11FRev);
case GalTextureFormat.Z24S8: return (PixelFormat.DepthStencil, PixelType.UnsignedInt248);
throw new NotImplementedException(Format.ToString());
@ -148,6 +151,8 @@ namespace Ryujinx.Graphics.Gal.OpenGL
switch (Format)
case GalTextureFormat.BC6H_UF16: return InternalFormat.CompressedRgbBptcUnsignedFloat;
case GalTextureFormat.BC6H_SF16: return InternalFormat.CompressedRgbBptcSignedFloat;
case GalTextureFormat.BC7U: return InternalFormat.CompressedRgbaBptcUnorm;
case GalTextureFormat.BC1: return InternalFormat.CompressedRgbaS3tcDxt1Ext;
case GalTextureFormat.BC2: return InternalFormat.CompressedRgbaS3tcDxt3Ext;
Normal file
Normal file
@ -0,0 +1,43 @@
using OpenTK.Graphics.OpenGL;
namespace Ryujinx.Graphics.Gal.OpenGL
static class OGLExtension
private static bool Initialized = false;
private static bool EnhancedLayouts;
public static bool HasEnhancedLayouts()
return EnhancedLayouts;
private static void EnsureInitialized()
if (Initialized)
EnhancedLayouts = HasExtension("GL_ARB_enhanced_layouts");
private static bool HasExtension(string Name)
int NumExtensions = GL.GetInteger(GetPName.NumExtensions);
for (int Extension = 0; Extension < NumExtensions; Extension++)
if (GL.GetString(StringNameIndexed.Extensions, Extension) == Name)
return true;
return false;
@ -32,57 +32,49 @@ namespace Ryujinx.Graphics.Gal.OpenGL
public int RbHandle { get; private set; }
public int TexHandle { get; private set; }
public FrameBuffer(int Width, int Height)
public FrameBuffer(int Width, int Height, bool HasRenderBuffer)
this.Width = Width;
this.Height = Height;
Handle = GL.GenFramebuffer();
RbHandle = GL.GenRenderbuffer();
TexHandle = GL.GenTexture();
if (HasRenderBuffer)
RbHandle = GL.GenRenderbuffer();
private struct ShaderProgram
public int Handle;
public int VpHandle;
public int FpHandle;
private const int NativeWidth = 1280;
private const int NativeHeight = 720;
private Dictionary<long, FrameBuffer> Fbs;
private ShaderProgram Shader;
private Rect Viewport;
private Rect Window;
private bool IsInitialized;
private FrameBuffer CurrFb;
private FrameBuffer CurrReadFb;
private int RawFbTexWidth;
private int RawFbTexHeight;
private int RawFbTexHandle;
private FrameBuffer RawFb;
private int CurrFbHandle;
private int CurrTexHandle;
private bool FlipX;
private bool FlipY;
private int VaoHandle;
private int VboHandle;
private int CropTop;
private int CropLeft;
private int CropRight;
private int CropBottom;
public OGLFrameBuffer()
Fbs = new Dictionary<long, FrameBuffer>();
Shader = new ShaderProgram();
public void Create(long Key, int Width, int Height)
//TODO: We should either use the original frame buffer size,
//or just remove the Width/Height arguments.
Width = Window.Width;
Height = Window.Height;
if (Fbs.TryGetValue(Key, out FrameBuffer Fb))
if (Fb.Width != Width ||
@ -97,7 +89,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
Fb = new FrameBuffer(Width, Height);
Fb = new FrameBuffer(Width, Height, true);
SetupTexture(Fb.TexHandle, Width, Height);
@ -125,8 +117,6 @@ namespace Ryujinx.Graphics.Gal.OpenGL
GL.Viewport(0, 0, Width, Height);
Fbs.Add(Key, Fb);
@ -136,7 +126,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
GL.BindFramebuffer(FramebufferTarget.Framebuffer, Fb.Handle);
CurrFbHandle = Fb.Handle;
CurrFb = Fb;
@ -154,75 +144,50 @@ namespace Ryujinx.Graphics.Gal.OpenGL
if (Fbs.TryGetValue(Key, out FrameBuffer Fb))
CurrTexHandle = Fb.TexHandle;
CurrReadFb = Fb;
public void Set(byte[] Data, int Width, int Height)
if (RawFbTexHandle == 0)
if (RawFb == null)
RawFbTexHandle = GL.GenTexture();
CreateRawFb(Width, Height);
if (RawFbTexWidth != Width ||
RawFbTexHeight != Height)
if (RawFb.Width != Width ||
RawFb.Height != Height)
SetupTexture(RawFbTexHandle, Width, Height);
SetupTexture(RawFb.TexHandle, Width, Height);
RawFbTexWidth = Width;
RawFbTexHeight = Height;
RawFb.Width = Width;
RawFb.Height = Height;
GL.BindTexture(TextureTarget.Texture2D, RawFbTexHandle);
GL.BindTexture(TextureTarget.Texture2D, RawFb.TexHandle);
(PixelFormat Format, PixelType Type) = OGLEnumConverter.GetTextureFormat(GalTextureFormat.A8B8G8R8);
GL.TexSubImage2D(TextureTarget.Texture2D, 0, 0, 0, Width, Height, Format, Type, Data);
CurrTexHandle = RawFbTexHandle;
CurrReadFb = RawFb;
public void SetTransform(float SX, float SY, float Rotate, float TX, float TY)
public void SetTransform(bool FlipX, bool FlipY, int Top, int Left, int Right, int Bottom)
this.FlipX = FlipX;
this.FlipY = FlipY;
Matrix2 Transform;
Transform = Matrix2.CreateScale(SX, SY);
Transform *= Matrix2.CreateRotation(Rotate);
Vector2 Offs = new Vector2(TX, TY);
int CurrentProgram = GL.GetInteger(GetPName.CurrentProgram);
int TransformUniformLocation = GL.GetUniformLocation(Shader.Handle, "transform");
GL.UniformMatrix2(TransformUniformLocation, false, ref Transform);
int OffsetUniformLocation = GL.GetUniformLocation(Shader.Handle, "offset");
GL.Uniform2(OffsetUniformLocation, ref Offs);
CropTop = Top;
CropLeft = Left;
CropRight = Right;
CropBottom = Bottom;
public void SetWindowSize(int Width, int Height)
int CurrentProgram = GL.GetInteger(GetPName.CurrentProgram);
int WindowSizeUniformLocation = GL.GetUniformLocation(Shader.Handle, "window_size");
GL.Uniform2(WindowSizeUniformLocation, new Vector2(Width, Height));
Window = new Rect(0, 0, Width, Height);
@ -230,77 +195,101 @@ namespace Ryujinx.Graphics.Gal.OpenGL
Viewport = new Rect(X, Y, Width, Height);
private void SetViewport(Rect Viewport)
public void Render()
if (CurrTexHandle != 0)
if (CurrReadFb != null)
int SrcX0, SrcX1, SrcY0, SrcY1;
//bool CullFaceEnable = GL.IsEnabled(EnableCap.CullFace);
if (CropLeft == 0 && CropRight == 0)
SrcX0 = 0;
SrcX1 = CurrReadFb.Width;
SrcX0 = CropLeft;
SrcX1 = CropRight;
bool DepthTestEnable = GL.IsEnabled(EnableCap.DepthTest);
if (CropTop == 0 && CropBottom == 0)
SrcY0 = 0;
SrcY1 = CurrReadFb.Height;
SrcY0 = CropTop;
SrcY1 = CropBottom;
bool StencilTestEnable = GL.IsEnabled(EnableCap.StencilTest);
float RatioX = MathF.Min(1f, (Window.Height * (float)NativeWidth) / ((float)NativeHeight * Window.Width));
float RatioY = MathF.Min(1f, (Window.Width * (float)NativeHeight) / ((float)NativeWidth * Window.Height));
bool AlphaBlendEnable = GL.IsEnabled(EnableCap.Blend);
int DstWidth = (int)(Window.Width * RatioX);
int DstHeight = (int)(Window.Height * RatioY);
int DstPaddingX = (Window.Width - DstWidth) / 2;
int DstPaddingY = (Window.Height - DstHeight) / 2;
int DstX0 = FlipX ? Window.Width - DstPaddingX : DstPaddingX;
int DstX1 = FlipX ? DstPaddingX : Window.Width - DstPaddingX;
GL.BindTexture(TextureTarget.Texture2D, CurrTexHandle);
int CurrentProgram = GL.GetInteger(GetPName.CurrentProgram);
int DstY0 = FlipY ? DstPaddingY : Window.Height - DstPaddingY;
int DstY1 = FlipY ? Window.Height - DstPaddingY : DstPaddingY;
GL.BindFramebuffer(FramebufferTarget.Framebuffer, 0);
GL.Viewport(0, 0, Window.Width, Window.Height);
ClearBufferMask.ColorBufferBit |
GL.BindFramebuffer(FramebufferTarget.ReadFramebuffer, CurrReadFb.Handle);
GL.Clear(ClearBufferMask.ColorBufferBit | ClearBufferMask.DepthBufferBit);
GL.DrawArrays(PrimitiveType.TriangleStrip, 0, 4);
//Restore the original state.
GL.BindFramebuffer(FramebufferTarget.Framebuffer, CurrFbHandle);
//if (CullFaceEnable)
// GL.Enable(EnableCap.CullFace);
if (DepthTestEnable)
SrcX0, SrcY0, SrcX1, SrcY1,
DstX0, DstY0, DstX1, DstY1,
ClearBufferMask.ColorBufferBit, BlitFramebufferFilter.Linear);
if (StencilTestEnable)
public void Copy(
long SrcKey,
long DstKey,
int SrcX0,
int SrcY0,
int SrcX1,
int SrcY1,
int DstX0,
int DstY0,
int DstX1,
int DstY1)
if (AlphaBlendEnable)
if (Fbs.TryGetValue(SrcKey, out FrameBuffer SrcFb) &&
Fbs.TryGetValue(DstKey, out FrameBuffer DstFb))
GL.BindFramebuffer(FramebufferTarget.ReadFramebuffer, SrcFb.Handle);
GL.BindFramebuffer(FramebufferTarget.DrawFramebuffer, DstFb.Handle);
//GL.Viewport(0, 0, 1280, 720);
SrcX0, SrcY0, SrcX1, SrcY1,
DstX0, DstY0, DstX1, DstY1,
@ -324,98 +313,61 @@ namespace Ryujinx.Graphics.Gal.OpenGL
GL.BindFramebuffer(FramebufferTarget.ReadFramebuffer, CurrFbHandle);
private void SetViewport(Rect Viewport)
public void SetBufferData(
long Key,
int Width,
int Height,
GalTextureFormat Format,
byte[] Buffer)
if (Fbs.TryGetValue(Key, out FrameBuffer Fb))
GL.BindTexture(TextureTarget.Texture2D, Fb.TexHandle);
private void EnsureInitialized()
if (!IsInitialized)
IsInitialized = true;
const int Level = 0;
const int Border = 0;
const PixelInternalFormat InternalFmt = PixelInternalFormat.Rgba;
(PixelFormat GlFormat, PixelType Type) = OGLEnumConverter.GetTextureFormat(Format);
private void SetupShader()
private void CreateRawFb(int Width, int Height)
Shader.VpHandle = GL.CreateShader(ShaderType.VertexShader);
Shader.FpHandle = GL.CreateShader(ShaderType.FragmentShader);
if (RawFb == null)
RawFb = new FrameBuffer(Width, Height, false);
string VpSource = EmbeddedResource.GetString("GlFbVtxShader");
string FpSource = EmbeddedResource.GetString("GlFbFragShader");
SetupTexture(RawFb.TexHandle, Width, Height);
GL.ShaderSource(Shader.VpHandle, VpSource);
GL.ShaderSource(Shader.FpHandle, FpSource);
RawFb.Width = Width;
RawFb.Height = Height;
Shader.Handle = GL.CreateProgram();
GL.BindFramebuffer(FramebufferTarget.Framebuffer, RawFb.Handle);
GL.AttachShader(Shader.Handle, Shader.VpHandle);
GL.AttachShader(Shader.Handle, Shader.FpHandle);
Matrix2 Transform = Matrix2.Identity;
int TexUniformLocation = GL.GetUniformLocation(Shader.Handle, "tex");
GL.Uniform1(TexUniformLocation, 0);
int WindowSizeUniformLocation = GL.GetUniformLocation(Shader.Handle, "window_size");
GL.Uniform2(WindowSizeUniformLocation, new Vector2(1280.0f, 720.0f));
int TransformUniformLocation = GL.GetUniformLocation(Shader.Handle, "transform");
GL.UniformMatrix2(TransformUniformLocation, false, ref Transform);
GL.Viewport(0, 0, Width, Height);
private void SetupVertex()
VaoHandle = GL.GenVertexArray();
VboHandle = GL.GenBuffer();
float[] Buffer = new float[]
-1, 1, 0, 0,
1, 1, 1, 0,
-1, -1, 0, 1,
1, -1, 1, 1
IntPtr Length = new IntPtr(Buffer.Length * 4);
GL.BindBuffer(BufferTarget.ArrayBuffer, VboHandle);
GL.BufferData(BufferTarget.ArrayBuffer, Length, Buffer, BufferUsageHint.StreamDraw);
GL.BindBuffer(BufferTarget.ArrayBuffer, 0);
GL.BindBuffer(BufferTarget.ArrayBuffer, VboHandle);
GL.VertexAttribPointer(0, 2, VertexAttribPointerType.Float, false, 16, 0);
GL.BindBuffer(BufferTarget.ArrayBuffer, VboHandle);
GL.VertexAttribPointer(1, 2, VertexAttribPointerType.Float, false, 16, 8);
private void SetupTexture(int Handle, int Width, int Height)
@ -211,28 +211,28 @@ namespace Ryujinx.Graphics.Gal.OpenGL
public void CreateVbo(long Key, byte[] Buffer)
public void CreateVbo(long Key, int DataSize, IntPtr HostAddress)
int Handle = GL.GenBuffer();
VboCache.AddOrUpdate(Key, Handle, (uint)Buffer.Length);
VboCache.AddOrUpdate(Key, Handle, (uint)DataSize);
IntPtr Length = new IntPtr(Buffer.Length);
IntPtr Length = new IntPtr(DataSize);
GL.BindBuffer(BufferTarget.ArrayBuffer, Handle);
GL.BufferData(BufferTarget.ArrayBuffer, Length, Buffer, BufferUsageHint.StreamDraw);
GL.BufferData(BufferTarget.ArrayBuffer, Length, HostAddress, BufferUsageHint.StreamDraw);
public void CreateIbo(long Key, byte[] Buffer)
public void CreateIbo(long Key, int DataSize, IntPtr HostAddress)
int Handle = GL.GenBuffer();
IboCache.AddOrUpdate(Key, Handle, (uint)Buffer.Length);
IboCache.AddOrUpdate(Key, Handle, (uint)DataSize);
IntPtr Length = new IntPtr(Buffer.Length);
IntPtr Length = new IntPtr(DataSize);
GL.BindBuffer(BufferTarget.ElementArrayBuffer, Handle);
GL.BufferData(BufferTarget.ElementArrayBuffer, Length, Buffer, BufferUsageHint.StreamDraw);
GL.BufferData(BufferTarget.ElementArrayBuffer, Length, HostAddress, BufferUsageHint.StreamDraw);
public void SetVertexArray(int Stride, long VboKey, GalVertexAttrib[] Attribs)
@ -278,9 +278,21 @@ namespace Ryujinx.Graphics.Gal.OpenGL
int Size = AttribElements[Attrib.Size];
int Offset = Attrib.Offset;
if (Attrib.Type == GalVertexAttribType.Sint ||
Attrib.Type == GalVertexAttribType.Uint)
IntPtr Pointer = new IntPtr(Offset);
VertexAttribIntegerType IType = (VertexAttribIntegerType)Type;
GL.VertexAttribIPointer(Attrib.Index, Size, IType, Stride, Pointer);
GL.VertexAttribPointer(Attrib.Index, Size, Type, Normalize, Stride, Offset);
public void SetIndexArray(int Size, GalIndexFormat Format)
@ -5,6 +5,8 @@ using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using Buffer = System.Buffer;
namespace Ryujinx.Graphics.Gal.OpenGL
public class OGLShader : IGalShader
@ -118,15 +120,20 @@ namespace Ryujinx.Graphics.Gal.OpenGL
if (IsDualVp)
ShaderDumper.Dump(Memory, Position, Type, "a");
ShaderDumper.Dump(Memory, PositionB, Type, "b");
Program = Decompiler.Decompile(
Position + 0x50,
PositionB + 0x50,
Program = Decompiler.Decompile(Memory, Position + 0x50, Type);
ShaderDumper.Dump(Memory, Position, Type);
Program = Decompiler.Decompile(Memory, Position, Type);
return new ShaderStage(
@ -146,7 +153,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
return Enumerable.Empty<ShaderDeclInfo>();
public void SetConstBuffer(long Key, int Cbuf, byte[] Data)
public void SetConstBuffer(long Key, int Cbuf, int DataSize, IntPtr HostAddress)
if (Stages.TryGetValue(Key, out ShaderStage Stage))
@ -154,13 +161,9 @@ namespace Ryujinx.Graphics.Gal.OpenGL
OGLStreamBuffer Buffer = GetConstBuffer(Stage.Type, Cbuf);
int Size = Math.Min(Data.Length, Buffer.Size);
int Size = Math.Min(DataSize, Buffer.Size);
byte[] Destiny = Buffer.Map(Size);
Array.Copy(Data, Destiny, Size);
Buffer.SetData(Size, HostAddress);
@ -193,6 +196,16 @@ namespace Ryujinx.Graphics.Gal.OpenGL
private void Bind(ShaderStage Stage)
if (Stage.Type == GalShaderType.Geometry)
//Enhanced layouts are required for Geometry shaders
//skip this stage if current driver has no ARB_enhanced_layouts
if (!OGLExtension.HasEnhancedLayouts())
switch (Stage.Type)
case GalShaderType.Vertex: Current.Vertex = Stage; break;
@ -203,6 +216,18 @@ namespace Ryujinx.Graphics.Gal.OpenGL
public void Unbind(GalShaderType Type)
switch (Type)
case GalShaderType.Vertex: Current.Vertex = null; break;
case GalShaderType.TessControl: Current.TessControl = null; break;
case GalShaderType.TessEvaluation: Current.TessEvaluation = null; break;
case GalShaderType.Geometry: Current.Geometry = null; break;
case GalShaderType.Fragment: Current.Fragment = null; break;
public void BindProgram()
if (Current.Vertex == null ||
@ -232,7 +257,10 @@ namespace Ryujinx.Graphics.Gal.OpenGL
if (CurrentProgramHandle != Handle)
CurrentProgramHandle = Handle;
@ -251,7 +279,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
int FreeBinding = 0;
int BindUniformBlocksIfNotNull(ShaderStage Stage)
void BindUniformBlocksIfNotNull(ShaderStage Stage)
if (Stage != null)
@ -270,8 +298,6 @@ namespace Ryujinx.Graphics.Gal.OpenGL
return FreeBinding;
@ -285,7 +311,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
int FreeBinding = 0;
int BindUniformBuffersIfNotNull(ShaderStage Stage)
void BindUniformBuffersIfNotNull(ShaderStage Stage)
if (Stage != null)
@ -298,8 +324,6 @@ namespace Ryujinx.Graphics.Gal.OpenGL
return FreeBinding;
@ -320,7 +344,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL
//Allocate a maximum of 64 KiB
int Size = Math.Min(GL.GetInteger(GetPName.MaxUniformBlockSize), 64 * 1024);
Buffer = OGLStreamBuffer.Create(BufferTarget.UniformBuffer, Size);
Buffer = new OGLStreamBuffer(BufferTarget.UniformBuffer, Size);
ConstBuffers[StageIndex][Cbuf] = Buffer;
@ -1,9 +1,9 @@
using System;
using OpenTK.Graphics.OpenGL;
using System;
namespace Ryujinx.Graphics.Gal.OpenGL
abstract class OGLStreamBuffer : IDisposable
class OGLStreamBuffer : IDisposable
public int Handle { get; protected set; }
@ -11,53 +11,25 @@ namespace Ryujinx.Graphics.Gal.OpenGL
protected BufferTarget Target { get; private set; }
private bool Mapped = false;
public OGLStreamBuffer(BufferTarget Target, int MaxSize)
public OGLStreamBuffer(BufferTarget Target, int Size)
Handle = 0;
Mapped = false;
this.Target = Target;
this.Size = MaxSize;
this.Size = Size;
Handle = GL.GenBuffer();
GL.BindBuffer(Target, Handle);
GL.BufferData(Target, Size, IntPtr.Zero, BufferUsageHint.StreamDraw);
public static OGLStreamBuffer Create(BufferTarget Target, int MaxSize)
public void SetData(int Size, IntPtr HostAddress)
//TODO: Query here for ARB_buffer_storage and use when available
return new SubDataBuffer(Target, MaxSize);
GL.BindBuffer(Target, Handle);
GL.BufferSubData(Target, IntPtr.Zero, Size, HostAddress);
public byte[] Map(int Size)
if (Handle == 0 || Mapped || Size > this.Size)
throw new InvalidOperationException();
byte[] Memory = InternMap(Size);
Mapped = true;
return Memory;
public void Unmap(int UsedSize)
if (Handle == 0 || !Mapped)
throw new InvalidOperationException();
Mapped = false;
protected abstract byte[] InternMap(int Size);
protected abstract void InternUnmap(int UsedSize);
public void Dispose()
@ -73,41 +45,4 @@ namespace Ryujinx.Graphics.Gal.OpenGL
class SubDataBuffer : OGLStreamBuffer
private byte[] Memory;
public SubDataBuffer(BufferTarget Target, int MaxSize)
: base(Target, MaxSize)
Memory = new byte[MaxSize];
GL.GenBuffers(1, out int Handle);
GL.BindBuffer(Target, Handle);
GL.BufferData(Target, Size, IntPtr.Zero, BufferUsageHint.StreamDraw);
this.Handle = Handle;
protected override byte[] InternMap(int Size)
return Memory;
protected override void InternUnmap(int UsedSize)
GL.BindBuffer(Target, Handle);
fixed (byte* MemoryPtr = Memory)
GL.BufferSubData(Target, IntPtr.Zero, UsedSize, (IntPtr)MemoryPtr);
@ -212,6 +212,8 @@ namespace Ryujinx.Graphics.Gal.OpenGL
switch (Format)
case GalTextureFormat.BC6H_UF16:
case GalTextureFormat.BC6H_SF16:
case GalTextureFormat.BC7U:
case GalTextureFormat.BC1:
case GalTextureFormat.BC2:
@ -4,13 +4,13 @@ namespace Ryujinx.Graphics.Gal.Shader
class GlslDecl
public const int LayerAttr = 0x064;
public const int TessCoordAttrX = 0x2f0;
public const int TessCoordAttrY = 0x2f4;
public const int TessCoordAttrZ = 0x2f8;
public const int InstanceIdAttr = 0x2f8;
public const int VertexIdAttr = 0x2fc;
public const int FaceAttr = 0x3fc;
public const int GlPositionWAttr = 0x7c;
public const int MaxUboSize = 1024;
@ -210,7 +210,8 @@ namespace Ryujinx.Graphics.Gal.Shader
//This is a built-in input variable.
if (Abuf.Offs == VertexIdAttr ||
Abuf.Offs == InstanceIdAttr ||
Abuf.Offs == FaceAttr)
Abuf.Offs == FaceAttr ||
Abuf.Offs == LayerAttr)
@ -254,6 +255,8 @@ namespace Ryujinx.Graphics.Gal.Shader
m_Attributes.Add(Index, DeclInfo);
Traverse(Abuf, Abuf.Vertex);
@ -21,10 +21,14 @@ namespace Ryujinx.Graphics.Gal.Shader
private const string IdentationStr = " ";
private const int MaxVertexInput = 3;
private static string[] ElemTypes = new string[] { "float", "vec2", "vec3", "vec4" };
private GlslDecl Decl;
private ShaderHeader Header, HeaderB;
private ShaderIrBlock[] Blocks, BlocksB;
private StringBuilder SB;
@ -50,6 +54,7 @@ namespace Ryujinx.Graphics.Gal.Shader
{ ShaderIrInst.Cle, GetCleExpr },
{ ShaderIrInst.Clt, GetCltExpr },
{ ShaderIrInst.Cne, GetCneExpr },
{ ShaderIrInst.Cut, GetCutExpr },
{ ShaderIrInst.Exit, GetExitExpr },
{ ShaderIrInst.Fabs, GetAbsExpr },
{ ShaderIrInst.Fadd, GetAddExpr },
@ -110,6 +115,9 @@ namespace Ryujinx.Graphics.Gal.Shader
long VpBPosition,
GalShaderType ShaderType)
Header = new ShaderHeader(Memory, VpAPosition);
HeaderB = new ShaderHeader(Memory, VpBPosition);
Blocks = ShaderDecoder.Decode(Memory, VpAPosition);
BlocksB = ShaderDecoder.Decode(Memory, VpBPosition);
@ -123,6 +131,9 @@ namespace Ryujinx.Graphics.Gal.Shader
public GlslProgram Decompile(IGalMemory Memory, long Position, GalShaderType ShaderType)
Header = new ShaderHeader(Memory, Position);
HeaderB = null;
Blocks = ShaderDecoder.Decode(Memory, Position);
BlocksB = null;
@ -137,6 +148,7 @@ namespace Ryujinx.Graphics.Gal.Shader
SB.AppendLine("#version 410 core");
@ -170,6 +182,37 @@ namespace Ryujinx.Graphics.Gal.Shader
private void PrintDeclHeader()
if (Decl.ShaderType == GalShaderType.Geometry)
int MaxVertices = Header.MaxOutputVertexCount;
string OutputTopology;
switch (Header.OutputTopology)
case ShaderHeader.PointList: OutputTopology = "points"; break;
case ShaderHeader.LineStrip: OutputTopology = "line_strip"; break;
case ShaderHeader.TriangleStrip: OutputTopology = "triangle_strip"; break;
default: throw new InvalidOperationException();
SB.AppendLine("#extension GL_ARB_enhanced_layouts : require");
SB.AppendLine("// Stubbed. Maxwell geometry shaders don't inform input geometry type");
SB.AppendLine("layout(triangles) in;" + Environment.NewLine);
SB.AppendLine($"layout({OutputTopology}, max_vertices = {MaxVertices}) out;");
private void PrintDeclTextures()
PrintDecls(Decl.Textures, "uniform sampler2D");
@ -201,7 +244,9 @@ namespace Ryujinx.Graphics.Gal.Shader
private void PrintDeclAttributes()
string GeometryArray = (Decl.ShaderType == GalShaderType.Geometry) ? "[" + MaxVertexInput + "]" : "";
PrintDecls(Decl.Attributes, Suffix: GeometryArray);
private void PrintDeclInAttributes()
@ -211,12 +256,32 @@ namespace Ryujinx.Graphics.Gal.Shader
SB.AppendLine("layout (location = " + GlslDecl.PositionOutAttrLocation + ") in vec4 " + GlslDecl.PositionOutAttrName + ";");
if (Decl.ShaderType == GalShaderType.Geometry)
if (Decl.InAttributes.Count > 0)
SB.AppendLine("in Vertex {");
foreach (ShaderDeclInfo DeclInfo in Decl.InAttributes.Values.OrderBy(DeclKeySelector))
if (DeclInfo.Index >= 0)
SB.AppendLine(IdentationStr + "layout (location = " + DeclInfo.Index + ") " + GetDecl(DeclInfo) + "; ");
SB.AppendLine("} block_in[];" + Environment.NewLine);
PrintDeclAttributes(Decl.InAttributes.Values, "in");
private void PrintDeclOutAttributes()
if (Decl.ShaderType == GalShaderType.Vertex)
if (Decl.ShaderType != GalShaderType.Fragment)
SB.AppendLine("layout (location = " + GlslDecl.PositionOutAttrLocation + ") out vec4 " + GlslDecl.PositionOutAttrName + ";");
@ -254,7 +319,7 @@ namespace Ryujinx.Graphics.Gal.Shader
PrintDecls(Decl.Preds, "bool");
private void PrintDecls(IReadOnlyDictionary<int, ShaderDeclInfo> Dict, string CustomType = null)
private void PrintDecls(IReadOnlyDictionary<int, ShaderDeclInfo> Dict, string CustomType = null, string Suffix = "")
foreach (ShaderDeclInfo DeclInfo in Dict.Values.OrderBy(DeclKeySelector))
@ -262,15 +327,15 @@ namespace Ryujinx.Graphics.Gal.Shader
if (CustomType != null)
Name = CustomType + " " + DeclInfo.Name + ";";
Name = CustomType + " " + DeclInfo.Name + Suffix + ";";
else if (DeclInfo.Name == GlslDecl.FragmentOutputName)
Name = "layout (location = 0) out " + GetDecl(DeclInfo) + ";" + Environment.NewLine;
Name = "layout (location = 0) out " + GetDecl(DeclInfo) + Suffix + ";" + Environment.NewLine;
Name = GetDecl(DeclInfo) + ";";
Name = GetDecl(DeclInfo) + Suffix + ";";
@ -307,8 +372,22 @@ namespace Ryujinx.Graphics.Gal.Shader
string Swizzle = ".xyzw".Substring(0, DeclInfo.Size + 1);
if (Decl.ShaderType == GalShaderType.Geometry)
for (int Vertex = 0; Vertex < MaxVertexInput; Vertex++)
string Dst = Attr.Name + "[" + Vertex + "]" + Swizzle;
string Src = "block_in[" + Vertex + "]." + DeclInfo.Name;
SB.AppendLine(IdentationStr + Dst + " = " + Src + ";");
SB.AppendLine(IdentationStr + Attr.Name + Swizzle + " = " + DeclInfo.Name + ";");
if (BlocksB != null)
@ -320,6 +399,16 @@ namespace Ryujinx.Graphics.Gal.Shader
SB.AppendLine(IdentationStr + GlslDecl.ProgramName + "();");
if (Decl.ShaderType != GalShaderType.Geometry)
private void PrintAttrToOutput(string Identation = IdentationStr)
foreach (KeyValuePair<int, ShaderDeclInfo> KV in Decl.OutAttributes)
if (!Decl.Attributes.TryGetValue(KV.Key, out ShaderDeclInfo Attr))
@ -331,18 +420,26 @@ namespace Ryujinx.Graphics.Gal.Shader
string Swizzle = ".xyzw".Substring(0, DeclInfo.Size + 1);
SB.AppendLine(IdentationStr + DeclInfo.Name + " = " + Attr.Name + Swizzle + ";");
string Name = Attr.Name;
if (Decl.ShaderType == GalShaderType.Geometry)
Name += "[0]";
SB.AppendLine(Identation + DeclInfo.Name + " = " + Name + Swizzle + ";");
if (Decl.ShaderType == GalShaderType.Vertex)
SB.AppendLine(IdentationStr + "gl_Position.xy *= " + GlslDecl.FlipUniformName + ";");
SB.AppendLine(IdentationStr + GlslDecl.PositionOutAttrName + " = gl_Position;");
SB.AppendLine(IdentationStr + GlslDecl.PositionOutAttrName + ".w = 1;");
SB.AppendLine(Identation + "gl_Position.xy *= " + GlslDecl.FlipUniformName + ";");
if (Decl.ShaderType != GalShaderType.Fragment)
SB.AppendLine(Identation + GlslDecl.PositionOutAttrName + " = gl_Position;");
SB.AppendLine(Identation + GlslDecl.PositionOutAttrName + ".w = 1;");
private void PrintBlockScope(
@ -481,12 +578,18 @@ namespace Ryujinx.Graphics.Gal.Shader
SB.AppendLine(Identation + "continue;");
else if (Op.Inst == ShaderIrInst.Emit)
SB.AppendLine(Identation + "EmitVertex();");
SB.AppendLine(Identation + GetSrcExpr(Op, true) + ";");
else if (Node is ShaderIrCmnt Cmnt)
SB.AppendLine(Identation + "// " + Cmnt.Comment);
@ -598,9 +701,6 @@ namespace Ryujinx.Graphics.Gal.Shader
switch (Op.Inst)
case ShaderIrInst.Frcp:
return true;
case ShaderIrInst.Ipa:
case ShaderIrInst.Texq:
case ShaderIrInst.Texs:
@ -608,8 +708,7 @@ namespace Ryujinx.Graphics.Gal.Shader
return false;
return Op.OperandB != null ||
Op.OperandC != null;
return true;
private string GetName(ShaderIrOperCbuf Cbuf)
@ -635,6 +734,14 @@ namespace Ryujinx.Graphics.Gal.Shader
private string GetOutAbufName(ShaderIrOperAbuf Abuf)
if (Decl.ShaderType == GalShaderType.Geometry)
switch (Abuf.Offs)
case GlslDecl.LayerAttr: return "gl_Layer";
return GetAttrTempName(Abuf);
@ -693,8 +800,17 @@ namespace Ryujinx.Graphics.Gal.Shader
throw new InvalidOperationException();
if (Decl.ShaderType == GalShaderType.Geometry)
string Vertex = "floatBitsToInt(" + GetSrcExpr(Abuf.Vertex) + ")";
return DeclInfo.Name + "[" + Vertex + "]" + Swizzle;
return DeclInfo.Name + Swizzle;
private string GetName(ShaderIrOperGpr Gpr)
@ -711,13 +827,13 @@ namespace Ryujinx.Graphics.Gal.Shader
return Imm.Value.ToString(CultureInfo.InvariantCulture);
return GetIntConst(Imm.Value);
private string GetValue(ShaderIrOperImmf Immf)
return Immf.Value.ToString(CultureInfo.InvariantCulture);
return GetFloatConst(Immf.Value);
private string GetName(ShaderIrOperPred Pred)
@ -806,6 +922,8 @@ namespace Ryujinx.Graphics.Gal.Shader
private string GetCneExpr(ShaderIrOp Op) => GetBinaryExpr(Op, "!=");
private string GetCutExpr(ShaderIrOp Op) => "EndPrimitive()";
private string GetCneuExpr(ShaderIrOp Op) => GetBinaryExprWithNaN(Op, "!=");
private string GetCnumExpr(ShaderIrOp Op) => GetUnaryCall(Op, "!isnan");
@ -1047,7 +1165,7 @@ namespace Ryujinx.Graphics.Gal.Shader
if (!float.IsNaN(Value) && !float.IsInfinity(Value))
return Value.ToString(CultureInfo.InvariantCulture);
return GetFloatConst(Value);
@ -1064,6 +1182,20 @@ namespace Ryujinx.Graphics.Gal.Shader
return Expr;
private static string GetIntConst(int Value)
string Expr = Value.ToString(CultureInfo.InvariantCulture);
return Value < 0 ? "(" + Expr + ")" : Expr;
private static string GetFloatConst(float Value)
string Expr = Value.ToString(CultureInfo.InvariantCulture);
return Value < 0 ? "(" + Expr + ")" : Expr;
private static OperType GetDstNodeType(ShaderIrNode Node)
//Special case instructions with the result type different
@ -1091,8 +1223,9 @@ namespace Ryujinx.Graphics.Gal.Shader
switch (Node)
case ShaderIrOperAbuf Abuf:
return Abuf.Offs == GlslDecl.VertexIdAttr ||
return Abuf.Offs == GlslDecl.LayerAttr ||
Abuf.Offs == GlslDecl.InstanceIdAttr ||
Abuf.Offs == GlslDecl.VertexIdAttr ||
Abuf.Offs == GlslDecl.FaceAttr
? OperType.I32
: OperType.F32;
@ -144,6 +144,50 @@ namespace Ryujinx.Graphics.Gal.Shader
EmitFsetp(Block, OpCode, ShaderOper.RR);
public static void Iadd_C(ShaderIrBlock Block, long OpCode)
EmitIadd(Block, OpCode, ShaderOper.CR);
public static void Iadd_I(ShaderIrBlock Block, long OpCode)
EmitIadd(Block, OpCode, ShaderOper.Imm);
public static void Iadd_I32(ShaderIrBlock Block, long OpCode)
ShaderIrNode OperA = GetOperGpr8 (OpCode);
ShaderIrNode OperB = GetOperImm32_20(OpCode);
bool NegA = ((OpCode >> 56) & 1) != 0;
OperA = GetAluIneg(OperA, NegA);
ShaderIrOp Op = new ShaderIrOp(ShaderIrInst.Add, OperA, OperB);
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Op), OpCode));
public static void Iadd_R(ShaderIrBlock Block, long OpCode)
EmitIadd(Block, OpCode, ShaderOper.RR);
public static void Iadd3_C(ShaderIrBlock Block, long OpCode)
EmitIadd3(Block, OpCode, ShaderOper.CR);
public static void Iadd3_I(ShaderIrBlock Block, long OpCode)
EmitIadd3(Block, OpCode, ShaderOper.Imm);
public static void Iadd3_R(ShaderIrBlock Block, long OpCode)
EmitIadd3(Block, OpCode, ShaderOper.RR);
public static void Imnmx_C(ShaderIrBlock Block, long OpCode)
EmitImnmx(Block, OpCode, ShaderOper.CR);
@ -184,6 +228,21 @@ namespace Ryujinx.Graphics.Gal.Shader
EmitIscadd(Block, OpCode, ShaderOper.RR);
public static void Iset_C(ShaderIrBlock Block, long OpCode)
EmitIset(Block, OpCode, ShaderOper.CR);
public static void Iset_I(ShaderIrBlock Block, long OpCode)
EmitIset(Block, OpCode, ShaderOper.Imm);
public static void Iset_R(ShaderIrBlock Block, long OpCode)
EmitIset(Block, OpCode, ShaderOper.RR);
public static void Isetp_C(ShaderIrBlock Block, long OpCode)
EmitIsetp(Block, OpCode, ShaderOper.CR);
@ -215,13 +274,13 @@ namespace Ryujinx.Graphics.Gal.Shader
case 2: Inst = ShaderIrInst.Xor; break;
ShaderIrNode OperA = GetAluNot(GetOperGpr8(OpCode), InvA);
ShaderIrNode OperB = GetAluNot(GetOperImm32_20(OpCode), InvB);
//SubOp == 3 is pass, used by the not instruction
//which just moves the inverted register value.
if (SubOp < 3)
ShaderIrNode OperB = GetAluNot(GetOperImm32_20(OpCode), InvB);
ShaderIrNode OperA = GetAluNot(GetOperGpr8(OpCode), InvA);
ShaderIrOp Op = new ShaderIrOp(Inst, OperA, OperB);
@ -229,10 +288,25 @@ namespace Ryujinx.Graphics.Gal.Shader
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), OperA), OpCode));
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), OperB), OpCode));
public static void Lop_C(ShaderIrBlock Block, long OpCode)
EmitLop(Block, OpCode, ShaderOper.CR);
public static void Lop_I(ShaderIrBlock Block, long OpCode)
EmitLop(Block, OpCode, ShaderOper.Imm);
public static void Lop_R(ShaderIrBlock Block, long OpCode)
EmitLop(Block, OpCode, ShaderOper.RR);
public static void Mufu(ShaderIrBlock Block, long OpCode)
int SubOp = (int)(OpCode >> 20) & 0xf;
@ -368,6 +442,41 @@ namespace Ryujinx.Graphics.Gal.Shader
return Signed ? ShaderIrInst.Asr : ShaderIrInst.Lsr;
public static void Vmad(ShaderIrBlock Block, long OpCode)
ShaderIrNode OperA = GetOperGpr8(OpCode);
ShaderIrNode OperB;
if (((OpCode >> 50) & 1) != 0)
OperB = GetOperGpr20(OpCode);
OperB = GetOperImm19_20(OpCode);
ShaderIrOperGpr OperC = GetOperGpr39(OpCode);
ShaderIrNode Tmp = new ShaderIrOp(ShaderIrInst.Mul, OperA, OperB);
ShaderIrNode Final = new ShaderIrOp(ShaderIrInst.Add, Tmp, OperC);
int Shr = (int)((OpCode >> 51) & 3);
if (Shr != 0)
int Shift = (Shr == 2) ? 15 : 7;
Final = new ShaderIrOp(ShaderIrInst.Lsr, Final, new ShaderIrOperImm(Shift));
Block.AddNode(new ShaderIrCmnt("Stubbed. Instruction is reduced to a * b + c"));
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Final), OpCode));
public static void Xmad_CR(ShaderIrBlock Block, long OpCode)
EmitXmad(Block, OpCode, ShaderOper.CR);
@ -533,6 +642,92 @@ namespace Ryujinx.Graphics.Gal.Shader
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Op), OpCode));
private static void EmitIadd(ShaderIrBlock Block, long OpCode, ShaderOper Oper)
ShaderIrNode OperA = GetOperGpr8(OpCode);
ShaderIrNode OperB;
switch (Oper)
case ShaderOper.CR: OperB = GetOperCbuf34 (OpCode); break;
case ShaderOper.Imm: OperB = GetOperImm19_20(OpCode); break;
case ShaderOper.RR: OperB = GetOperGpr20 (OpCode); break;
default: throw new ArgumentException(nameof(Oper));
bool NegA = ((OpCode >> 49) & 1) != 0;
bool NegB = ((OpCode >> 48) & 1) != 0;
OperA = GetAluIneg(OperA, NegA);
OperB = GetAluIneg(OperB, NegB);
ShaderIrOp Op = new ShaderIrOp(ShaderIrInst.Add, OperA, OperB);
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Op), OpCode));
private static void EmitIadd3(ShaderIrBlock Block, long OpCode, ShaderOper Oper)
int Mode = (int)((OpCode >> 37) & 3);
bool Neg1 = ((OpCode >> 51) & 1) != 0;
bool Neg2 = ((OpCode >> 50) & 1) != 0;
bool Neg3 = ((OpCode >> 49) & 1) != 0;
int Height1 = (int)((OpCode >> 35) & 3);
int Height2 = (int)((OpCode >> 33) & 3);
int Height3 = (int)((OpCode >> 31) & 3);
ShaderIrNode OperB;
switch (Oper)
case ShaderOper.CR: OperB = GetOperCbuf34 (OpCode); break;
case ShaderOper.Imm: OperB = GetOperImm19_20(OpCode); break;
case ShaderOper.RR: OperB = GetOperGpr20 (OpCode); break;
default: throw new ArgumentException(nameof(Oper));
ShaderIrNode ApplyHeight(ShaderIrNode Src, int Height)
if (Oper != ShaderOper.RR)
return Src;
switch (Height)
case 0: return Src;
case 1: return new ShaderIrOp(ShaderIrInst.And, Src, new ShaderIrOperImm(0xffff));
case 2: return new ShaderIrOp(ShaderIrInst.Lsr, Src, new ShaderIrOperImm(16));
default: throw new InvalidOperationException();
ShaderIrNode Src1 = GetAluIneg(ApplyHeight(GetOperGpr8(OpCode), Height1), Neg1);
ShaderIrNode Src2 = GetAluIneg(ApplyHeight(OperB, Height2), Neg2);
ShaderIrNode Src3 = GetAluIneg(ApplyHeight(GetOperGpr39(OpCode), Height3), Neg3);
ShaderIrOp Sum = new ShaderIrOp(ShaderIrInst.Add, Src1, Src2);
if (Oper == ShaderOper.RR)
switch (Mode)
case 1: Sum = new ShaderIrOp(ShaderIrInst.Lsr, Sum, new ShaderIrOperImm(16)); break;
case 2: Sum = new ShaderIrOp(ShaderIrInst.Lsl, Sum, new ShaderIrOperImm(16)); break;
//Note: Here there should be a "+ 1" when carry flag is set
//but since carry is mostly ignored by other instructions, it's excluded for now
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), new ShaderIrOp(ShaderIrInst.Add, Sum, Src3)), OpCode));
private static void EmitIscadd(ShaderIrBlock Block, long OpCode, ShaderOper Oper)
bool NegB = ((OpCode >> 48) & 1) != 0;
@ -659,6 +854,8 @@ namespace Ryujinx.Graphics.Gal.Shader
OperA = GetAluFabsFneg(OperA, AbsA, NegA);
Block.AddNode(new ShaderIrCmnt("Stubbed."));
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), OperA), OpCode));
@ -676,10 +873,11 @@ namespace Ryujinx.Graphics.Gal.Shader
bool NegA = ((OpCode >> 43) & 1) != 0;
bool AbsB = ((OpCode >> 44) & 1) != 0;
bool BoolFloat = ((OpCode >> 52) & 1) != 0;
bool NegB = ((OpCode >> 53) & 1) != 0;
bool AbsA = ((OpCode >> 54) & 1) != 0;
bool BoolFloat = ((OpCode >> (IsFloat ? 52 : 44)) & 1) != 0;
ShaderIrNode OperA = GetOperGpr8(OpCode), OperB;
switch (Oper)
@ -821,6 +1019,54 @@ namespace Ryujinx.Graphics.Gal.Shader
Block.AddNode(GetPredNode(new ShaderIrAsg(P0Node, Op), OpCode));
private static void EmitLop(ShaderIrBlock Block, long OpCode, ShaderOper Oper)
int SubOp = (int)(OpCode >> 41) & 3;
bool InvA = ((OpCode >> 39) & 1) != 0;
bool InvB = ((OpCode >> 40) & 1) != 0;
ShaderIrInst Inst = 0;
switch (SubOp)
case 0: Inst = ShaderIrInst.And; break;
case 1: Inst = ShaderIrInst.Or; break;
case 2: Inst = ShaderIrInst.Xor; break;
ShaderIrNode OperA = GetAluNot(GetOperGpr8(OpCode), InvA);
ShaderIrNode OperB;
switch (Oper)
case ShaderOper.CR: OperB = GetOperCbuf34 (OpCode); break;
case ShaderOper.Imm: OperB = GetOperImm19_20(OpCode); break;
case ShaderOper.RR: OperB = GetOperGpr20 (OpCode); break;
default: throw new ArgumentException(nameof(Oper));
OperB = GetAluNot(OperB, InvB);
ShaderIrNode Op;
if (SubOp < 3)
Op = new ShaderIrOp(Inst, OperA, OperB);
Op = OperB;
ShaderIrNode Compare = new ShaderIrOp(ShaderIrInst.Cne, Op, new ShaderIrOperImm(0));
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperPred48(OpCode), Compare), OpCode));
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Op), OpCode));
private static void EmitXmad(ShaderIrBlock Block, long OpCode, ShaderOper Oper)
//TODO: Confirm SignAB/C, it is just a guess.
@ -7,14 +7,15 @@ namespace Ryujinx.Graphics.Gal.Shader
public static ShaderIrOperAbuf[] GetOperAbuf20(long OpCode)
int Abuf = (int)(OpCode >> 20) & 0x3ff;
int Reg = (int)(OpCode >> 39) & 0xff;
int Size = (int)(OpCode >> 47) & 3;
ShaderIrOperGpr Vertex = GetOperGpr39(OpCode);
ShaderIrOperAbuf[] Opers = new ShaderIrOperAbuf[Size + 1];
for (int Index = 0; Index <= Size; Index++)
Opers[Index] = new ShaderIrOperAbuf(Abuf + Index * 4, Reg);
Opers[Index] = new ShaderIrOperAbuf(Abuf + Index * 4, Vertex);
return Opers;
@ -23,9 +24,8 @@ namespace Ryujinx.Graphics.Gal.Shader
public static ShaderIrOperAbuf GetOperAbuf28(long OpCode)
int Abuf = (int)(OpCode >> 28) & 0x3ff;
int Reg = (int)(OpCode >> 39) & 0xff;
return new ShaderIrOperAbuf(Abuf, Reg);
return new ShaderIrOperAbuf(Abuf, GetOperGpr39(OpCode));
public static ShaderIrOperCbuf GetOperCbuf34(long OpCode)
@ -156,6 +156,11 @@ namespace Ryujinx.Graphics.Gal.Shader
return new ShaderIrOperPred((int)(OpCode >> 39) & 7);
public static ShaderIrOperPred GetOperPred48(long OpCode)
return new ShaderIrOperPred((int)((OpCode >> 48) & 7));
public static ShaderIrInst GetCmp(long OpCode)
switch ((int)(OpCode >> 49) & 7)
@ -35,6 +35,9 @@ namespace Ryujinx.Graphics.Gal.Shader
ShaderIrNode[] Opers = GetOperAbuf20(OpCode);
//Used by GS
ShaderIrOperGpr Vertex = GetOperGpr39(OpCode);
int Index = 0;
foreach (ShaderIrNode OperA in Opers)
@ -85,6 +85,16 @@ namespace Ryujinx.Graphics.Gal.Shader
EmitI2i(Block, OpCode, ShaderOper.RR);
public static void Isberd(ShaderIrBlock Block, long OpCode)
//This instruction seems to be used to translate from an address to a vertex index in a GS
//Stub it as such
Block.AddNode(new ShaderIrCmnt("Stubbed."));
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), GetOperGpr8(OpCode)), OpCode));
public static void Mov_C(ShaderIrBlock Block, long OpCode)
ShaderIrOperCbuf Cbuf = GetOperCbuf34(OpCode);
@ -113,6 +123,31 @@ namespace Ryujinx.Graphics.Gal.Shader
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Gpr), OpCode));
public static void Sel_C(ShaderIrBlock Block, long OpCode)
EmitSel(Block, OpCode, ShaderOper.CR);
public static void Sel_I(ShaderIrBlock Block, long OpCode)
EmitSel(Block, OpCode, ShaderOper.Imm);
public static void Sel_R(ShaderIrBlock Block, long OpCode)
EmitSel(Block, OpCode, ShaderOper.RR);
public static void Mov_S(ShaderIrBlock Block, long OpCode)
Block.AddNode(new ShaderIrCmnt("Stubbed."));
//Zero is used as a special number to get a valid "0 * 0 + VertexIndex" in a GS
ShaderIrNode Source = new ShaderIrOperImm(0);
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Source), OpCode));
private static void EmitF2f(ShaderIrBlock Block, long OpCode, ShaderOper Oper)
bool NegA = ((OpCode >> 45) & 1) != 0;
@ -340,6 +375,28 @@ namespace Ryujinx.Graphics.Gal.Shader
Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), OperA), OpCode));
private static void EmitSel(ShaderIrBlock Block, long OpCode, ShaderOper Oper)
ShaderIrOperGpr Dst = GetOperGpr0 (OpCode);
ShaderIrNode Pred = GetOperPred39N(OpCode);
ShaderIrNode ResultA = GetOperGpr8(OpCode);
ShaderIrNode ResultB;
switch (Oper)
case ShaderOper.CR: ResultB = GetOperCbuf34 (OpCode); break;
case ShaderOper.Imm: ResultB = GetOperImm19_20(OpCode); break;
case ShaderOper.RR: ResultB = GetOperGpr20 (OpCode); break;
default: throw new ArgumentException(nameof(Oper));
Block.AddNode(GetPredNode(new ShaderIrCond(Pred, new ShaderIrAsg(Dst, ResultA), false), OpCode));
Block.AddNode(GetPredNode(new ShaderIrCond(Pred, new ShaderIrAsg(Dst, ResultB), true), OpCode));
private static IntType GetIntType(long OpCode)
bool Signed = ((OpCode >> 13) & 1) != 0;
Normal file
Normal file
@ -0,0 +1,29 @@
using System;
using static Ryujinx.Graphics.Gal.Shader.ShaderDecodeHelper;
namespace Ryujinx.Graphics.Gal.Shader
static partial class ShaderDecode
public static void Out_R(ShaderIrBlock Block, long OpCode)
//TODO: Those registers have to be used for something
ShaderIrOperGpr Gpr0 = GetOperGpr0(OpCode);
ShaderIrOperGpr Gpr8 = GetOperGpr8(OpCode);
ShaderIrOperGpr Gpr20 = GetOperGpr20(OpCode);
int Type = (int)((OpCode >> 39) & 3);
if ((Type & 1) != 0)
Block.AddNode(GetPredNode(new ShaderIrOp(ShaderIrInst.Emit), OpCode));
if ((Type & 2) != 0)
Block.AddNode(GetPredNode(new ShaderIrOp(ShaderIrInst.Cut), OpCode));
@ -4,6 +4,8 @@ namespace Ryujinx.Graphics.Gal.Shader
static class ShaderDecoder
private const long HeaderSize = 0x50;
private const bool AddDbgComments = true;
public static ShaderIrBlock[] Decode(IGalMemory Memory, long Start)
@ -32,13 +34,13 @@ namespace Ryujinx.Graphics.Gal.Shader
return Output;
ShaderIrBlock Entry = Enqueue(Start);
ShaderIrBlock Entry = Enqueue(Start + HeaderSize);
while (Blocks.Count > 0)
ShaderIrBlock Current = Blocks.Dequeue();
FillBlock(Memory, Current);
FillBlock(Memory, Current, Start + HeaderSize);
//Set child blocks. "Branch" is the block the branch instruction
//points to (when taken), "Next" is the block at the next address,
@ -122,14 +124,14 @@ namespace Ryujinx.Graphics.Gal.Shader
return Graph;
private static void FillBlock(IGalMemory Memory, ShaderIrBlock Block)
private static void FillBlock(IGalMemory Memory, ShaderIrBlock Block, long Beginning)
long Position = Block.Position;
//Ignore scheduling instructions, which are written every 32 bytes.
if ((Position & 0x1f) == 0)
if (((Position - Beginning) & 0x1f) == 0)
Position += 8;
@ -147,7 +149,7 @@ namespace Ryujinx.Graphics.Gal.Shader
if (AddDbgComments)
string DbgOpCode = $"0x{(Position - 8):x16}: 0x{OpCode:x16} ";
string DbgOpCode = $"0x{(Position - Beginning - 8):x16}: 0x{OpCode:x16} ";
DbgOpCode += (Decode?.Method.Name ?? "???");
Normal file
Normal file
@ -0,0 +1,73 @@
namespace Ryujinx.Graphics.Gal.Shader
class ShaderHeader
public const int PointList = 1;
public const int LineStrip = 6;
public const int TriangleStrip = 7;
public int SphType { get; private set; }
public int Version { get; private set; }
public int ShaderType { get; private set; }
public bool MrtEnable { get; private set; }
public bool KillsPixels { get; private set; }
public bool DoesGlobalStore { get; private set; }
public int SassVersion { get; private set; }
public bool DoesLoadOrStore { get; private set; }
public bool DoesFp64 { get; private set; }
public int StreamOutMask { get; private set; }
public int ShaderLocalMemoryLowSize { get; private set; }
public int PerPatchAttributeCount { get; private set; }
public int ShaderLocalMemoryHighSize { get; private set; }
public int ThreadsPerInputPrimitive { get; private set; }
public int ShaderLocalMemoryCrsSize { get; private set; }
public int OutputTopology { get; private set; }
public int MaxOutputVertexCount { get; private set; }
public int StoreReqStart { get; private set; }
public int StoreReqEnd { get; private set; }
public ShaderHeader(IGalMemory Memory, long Position)
uint CommonWord0 = (uint)Memory.ReadInt32(Position + 0);
uint CommonWord1 = (uint)Memory.ReadInt32(Position + 4);
uint CommonWord2 = (uint)Memory.ReadInt32(Position + 8);
uint CommonWord3 = (uint)Memory.ReadInt32(Position + 12);
uint CommonWord4 = (uint)Memory.ReadInt32(Position + 16);
SphType = ReadBits(CommonWord0, 0, 5);
Version = ReadBits(CommonWord0, 5, 5);
ShaderType = ReadBits(CommonWord0, 10, 4);
MrtEnable = ReadBits(CommonWord0, 14, 1) != 0;
KillsPixels = ReadBits(CommonWord0, 15, 1) != 0;
DoesGlobalStore = ReadBits(CommonWord0, 16, 1) != 0;
SassVersion = ReadBits(CommonWord0, 17, 4);
DoesLoadOrStore = ReadBits(CommonWord0, 26, 1) != 0;
DoesFp64 = ReadBits(CommonWord0, 27, 1) != 0;
StreamOutMask = ReadBits(CommonWord0, 28, 4);
ShaderLocalMemoryLowSize = ReadBits(CommonWord1, 0, 24);
PerPatchAttributeCount = ReadBits(CommonWord1, 24, 8);
ShaderLocalMemoryHighSize = ReadBits(CommonWord2, 0, 24);
ThreadsPerInputPrimitive = ReadBits(CommonWord2, 24, 8);
ShaderLocalMemoryCrsSize = ReadBits(CommonWord3, 0, 24);
OutputTopology = ReadBits(CommonWord3, 24, 4);
MaxOutputVertexCount = ReadBits(CommonWord4, 0, 12);
StoreReqStart = ReadBits(CommonWord4, 12, 8);
StoreReqEnd = ReadBits(CommonWord4, 24, 8);
private static int ReadBits(uint Word, int Offset, int BitWidth)
uint Mask = (1u << BitWidth) - 1u;
return (int)((Word >> Offset) & Mask);
@ -82,6 +82,9 @@ namespace Ryujinx.Graphics.Gal.Shader
@ -3,12 +3,13 @@ namespace Ryujinx.Graphics.Gal.Shader
class ShaderIrOperAbuf : ShaderIrNode
public int Offs { get; private set; }
public int GprIndex { get; private set; }
public ShaderIrOperAbuf(int Offs, int GprIndex)
public ShaderIrNode Vertex { get; private set; }
public ShaderIrOperAbuf(int Offs, ShaderIrNode Vertex)
this.Offs = Offs;
this.GprIndex = GprIndex;
this.Vertex = Vertex;
@ -64,29 +64,48 @@ namespace Ryujinx.Graphics.Gal.Shader
Set("0100110011100x", ShaderDecode.I2i_C);
Set("0011100x11100x", ShaderDecode.I2i_I);
Set("0101110011100x", ShaderDecode.I2i_R);
Set("0100110000010x", ShaderDecode.Iadd_C);
Set("0011100000010x", ShaderDecode.Iadd_I);
Set("0001110x0xxxxx", ShaderDecode.Iadd_I32);
Set("0101110000010x", ShaderDecode.Iadd_R);
Set("010011001100xx", ShaderDecode.Iadd3_C);
Set("001110001100xx", ShaderDecode.Iadd3_I);
Set("010111001100xx", ShaderDecode.Iadd3_R);
Set("0100110000100x", ShaderDecode.Imnmx_C);
Set("0011100x00100x", ShaderDecode.Imnmx_I);
Set("0101110000100x", ShaderDecode.Imnmx_R);
Set("1110111111010x", ShaderDecode.Isberd);
Set("11100000xxxxxx", ShaderDecode.Ipa);
Set("0100110000011x", ShaderDecode.Iscadd_C);
Set("0011100x00011x", ShaderDecode.Iscadd_I);
Set("0101110000011x", ShaderDecode.Iscadd_R);
Set("010010110101xx", ShaderDecode.Iset_C);
Set("001101100101xx", ShaderDecode.Iset_I);
Set("010110110101xx", ShaderDecode.Iset_R);
Set("010010110110xx", ShaderDecode.Isetp_C);
Set("0011011x0110xx", ShaderDecode.Isetp_I);
Set("010110110110xx", ShaderDecode.Isetp_R);
Set("111000110011xx", ShaderDecode.Kil);
Set("1110111111011x", ShaderDecode.Ld_A);
Set("1110111110010x", ShaderDecode.Ld_C);
Set("0100110001000x", ShaderDecode.Lop_C);
Set("0011100001000x", ShaderDecode.Lop_I);
Set("000001xxxxxxxx", ShaderDecode.Lop_I32);
Set("0101110001000x", ShaderDecode.Lop_R);
Set("0100110010011x", ShaderDecode.Mov_C);
Set("0011100x10011x", ShaderDecode.Mov_I);
Set("000000010000xx", ShaderDecode.Mov_I32);
Set("0101110010011x", ShaderDecode.Mov_R);
Set("1111000011001x", ShaderDecode.Mov_S);
Set("0101000010000x", ShaderDecode.Mufu);
Set("1111101111100x", ShaderDecode.Out_R);
Set("0101000010010x", ShaderDecode.Psetp);
Set("0100110010010x", ShaderDecode.Rro_C);
Set("0011100x10010x", ShaderDecode.Rro_I);
Set("0101110010010x", ShaderDecode.Rro_R);
Set("0100110010100x", ShaderDecode.Sel_C);
Set("0011100010100x", ShaderDecode.Sel_I);
Set("0101110010100x", ShaderDecode.Sel_R);
Set("0100110001001x", ShaderDecode.Shl_C);
Set("0011100x01001x", ShaderDecode.Shl_I);
Set("0101110001001x", ShaderDecode.Shl_R);
@ -98,6 +117,7 @@ namespace Ryujinx.Graphics.Gal.Shader
Set("1101111101001x", ShaderDecode.Texq);
Set("1101100xxxxxxx", ShaderDecode.Texs);
Set("1101101xxxxxxx", ShaderDecode.Tlds);
Set("01011111xxxxxx", ShaderDecode.Vmad);
Set("0100111xxxxxxx", ShaderDecode.Xmad_CR);
Set("0011011x00xxxx", ShaderDecode.Xmad_I);
Set("010100010xxxxx", ShaderDecode.Xmad_RC);
Normal file
Normal file
@ -0,0 +1,126 @@
using System;
using System.IO;
namespace Ryujinx.Graphics.Gal
static class ShaderDumper
private static string RuntimeDir;
private static int DumpIndex = 1;
public static void Dump(IGalMemory Memory, long Position, GalShaderType Type, string ExtSuffix = "")
if (string.IsNullOrWhiteSpace(GraphicsConfig.ShadersDumpPath))
string FileName = "Shader" + DumpIndex.ToString("d4") + "." + ShaderExtension(Type) + ExtSuffix + ".bin";
string FullPath = Path.Combine(FullDir(), FileName);
string CodePath = Path.Combine(CodeDir(), FileName);
using (FileStream FullFile = File.Create(FullPath))
using (FileStream CodeFile = File.Create(CodePath))
using (BinaryWriter FullWriter = new BinaryWriter(FullFile))
using (BinaryWriter CodeWriter = new BinaryWriter(CodeFile))
for (long i = 0; i < 0x50; i += 4)
FullWriter.Write(Memory.ReadInt32(Position + i));
long Offset = 0;
ulong Instruction = 0;
//Dump until a NOP instruction is found
while ((Instruction >> 52 & 0xfff8) != 0x50b0)
uint Word0 = (uint)Memory.ReadInt32(Position + 0x50 + Offset + 0);
uint Word1 = (uint)Memory.ReadInt32(Position + 0x50 + Offset + 4);
Instruction = Word0 | (ulong)Word1 << 32;
//Zero instructions (other kind of NOP) stop immediatly,
//this is to avoid two rows of zeroes
if (Instruction == 0)
Offset += 8;
//Align to meet nvdisasm requeriments
while (Offset % 0x20 != 0)
Offset += 4;
private static string FullDir()
return CreateAndReturn(Path.Combine(DumpDir(), "Full"));
private static string CodeDir()
return CreateAndReturn(Path.Combine(DumpDir(), "Code"));
private static string DumpDir()
if (string.IsNullOrEmpty(RuntimeDir))
int Index = 1;
RuntimeDir = Path.Combine(GraphicsConfig.ShadersDumpPath, "Dumps" + Index.ToString("d2"));
while (Directory.Exists(RuntimeDir));
return RuntimeDir;
private static string CreateAndReturn(string Dir)
if (!Directory.Exists(Dir))
return Dir;
private static string ShaderExtension(GalShaderType Type)
switch (Type)
case GalShaderType.Vertex: return "vert";
case GalShaderType.TessControl: return "tesc";
case GalShaderType.TessEvaluation: return "tese";
case GalShaderType.Geometry: return "geom";
case GalShaderType.Fragment: return "frag";
default: throw new ArgumentException(nameof(Type));
Normal file
Normal file
@ -0,0 +1,4 @@
public static class GraphicsConfig
public static string ShadersDumpPath;
@ -21,13 +21,4 @@
<ProjectReference Include="..\ChocolArm64\ChocolArm64.csproj" />
<EmbeddedResource Include="Gal\OpenGL\FbVtxShader.glsl">
<EmbeddedResource Include="Gal\OpenGL\FbFragShader.glsl">
Normal file
Normal file
@ -0,0 +1,177 @@
using ChocolArm64.Exceptions;
using ChocolArm64.Memory;
using Ryujinx.HLE.Logging;
using Ryujinx.HLE.OsHle;
using Ryujinx.HLE.OsHle.Handles;
using Ryujinx.HLE.Resource;
using System;
using System.Collections.Generic;
using System.IO;
namespace Ryujinx.HLE.Font
public class SharedFontManager
private const uint SharedMemorySize = 0x1100000;
private Logger Log;
private string FontsPath;
private object ShMemLock;
private (AMemory, long, long)[] ShMemPositions;
private Dictionary<SharedFontType, byte[]> FontData;
private uint[] LoadedFonts;
public SharedFontManager(Logger Log, string SystemPath)
this.Log = Log;
this.FontsPath = Path.Combine(SystemPath, "fonts");
ShMemLock = new object();
ShMemPositions = new(AMemory, long, long)[0];
FontData = new Dictionary<SharedFontType, byte[]>()
{ SharedFontType.JapanUsEurope, GetData("FontStandard") },
{ SharedFontType.SimplifiedChinese, GetData("FontChineseSimplified") },
{ SharedFontType.SimplifiedChineseEx, GetData("FontExtendedChineseSimplified") },
{ SharedFontType.TraditionalChinese, GetData("FontChineseTraditional") },
{ SharedFontType.Korean, GetData("FontKorean") },
{ SharedFontType.NintendoEx, GetData("FontNintendoExtended") }
int FontMemoryUsage = 0;
foreach (byte[] data in FontData.Values)
FontMemoryUsage += data.Length;
FontMemoryUsage += 0x8;
if (FontMemoryUsage > SharedMemorySize)
throw new InvalidSystemResourceException($"The sum of all fonts size exceed the shared memory size. Please make sure that the fonts don't exceed {SharedMemorySize} bytes in total. (actual size: {FontMemoryUsage} bytes)");
LoadedFonts = new uint[FontData.Count];
public byte[] GetData(string FontName)
string FontFilePath = Path.Combine(FontsPath, $"{FontName}.ttf");
if (File.Exists(FontFilePath))
return File.ReadAllBytes(FontFilePath);
throw new InvalidSystemResourceException($"Font \"{FontName}.ttf\" not found. Please provide it in \"{FontsPath}\".");
public void MapFont(SharedFontType FontType, AMemory Memory, long Position)
uint SharedMemoryAddressOffset = GetSharedMemoryAddressOffset(FontType);
// TODO: find what are the 8 bytes before the font
Memory.WriteUInt64(Position + SharedMemoryAddressOffset - 8, 0);
Memory.WriteBytes(Position + SharedMemoryAddressOffset, FontData[FontType]);
public void PropagateNewMapFont(SharedFontType Type)
lock (ShMemLock)
foreach ((AMemory Memory, long Position, long Size) in ShMemPositions)
AMemoryMapInfo MemoryInfo = Memory.Manager.GetMapInfo(Position);
if (MemoryInfo == null)
throw new VmmPageFaultException(Position);
// The memory is read only, we need to changes that to add the new font
AMemoryPerm originalPerms = MemoryInfo.Perm;
Memory.Manager.Reprotect(Position, Size, AMemoryPerm.RW);
MapFont(Type, Memory, Position);
Memory.Manager.Reprotect(Position, Size, originalPerms);
internal void ShMemMap(object sender, EventArgs e)
HSharedMem SharedMem = (HSharedMem)sender;
lock (ShMemLock)
ShMemPositions = SharedMem.GetVirtualPositions();
(AMemory Memory, long Position, long Size) = ShMemPositions[ShMemPositions.Length - 1];
for (int Type = 0; Type < LoadedFonts.Length; Type++)
if (LoadedFonts[(int)Type] == 1)
MapFont((SharedFontType)Type, Memory, Position);
internal void ShMemUnmap(object sender, EventArgs e)
HSharedMem SharedMem = (HSharedMem)sender;
lock (ShMemLock)
ShMemPositions = SharedMem.GetVirtualPositions();
public void Load(SharedFontType FontType)
if (LoadedFonts[(int)FontType] == 0)
LoadedFonts[(int)FontType] = 1;
public uint GetLoadState(SharedFontType FontType)
if (LoadedFonts[(int)FontType] != 1)
// Some games don't request a load, so we need to load it here.
return 0;
return LoadedFonts[(int)FontType];
public uint GetFontSize(SharedFontType FontType)
return (uint)FontData[FontType].Length;
public uint GetSharedMemoryAddressOffset(SharedFontType FontType)
uint Pos = 0x8;
for (SharedFontType Type = SharedFontType.JapanUsEurope; Type < FontType; Type++)
Pos += GetFontSize(Type);
Pos += 0x8;
return Pos;
public int Count => FontData.Count;
@ -1,6 +1,6 @@
namespace Ryujinx.HLE.OsHle.Services.Pl
namespace Ryujinx.HLE.Font
enum SharedFontType
public enum SharedFontType
JapanUsEurope = 0,
SimplifiedChinese = 1,
@ -1,6 +1,7 @@
using Ryujinx.Graphics.Gal;
using Ryujinx.HLE.Gpu.Memory;
using Ryujinx.HLE.Gpu.Texture;
using System;
using System.Collections.Generic;
namespace Ryujinx.HLE.Gpu.Engines
@ -64,6 +65,8 @@ namespace Ryujinx.HLE.Gpu.Engines
bool SrcLinear = ReadRegister(NvGpuEngine2dReg.SrcLinear) != 0;
int SrcWidth = ReadRegister(NvGpuEngine2dReg.SrcWidth);
int SrcHeight = ReadRegister(NvGpuEngine2dReg.SrcHeight);
int SrcPitch = ReadRegister(NvGpuEngine2dReg.SrcPitch);
int SrcBlkDim = ReadRegister(NvGpuEngine2dReg.SrcBlockDimensions);
bool DstLinear = ReadRegister(NvGpuEngine2dReg.DstLinear) != 0;
int DstWidth = ReadRegister(NvGpuEngine2dReg.DstWidth);
@ -71,73 +74,112 @@ namespace Ryujinx.HLE.Gpu.Engines
int DstPitch = ReadRegister(NvGpuEngine2dReg.DstPitch);
int DstBlkDim = ReadRegister(NvGpuEngine2dReg.DstBlockDimensions);
TextureSwizzle SrcSwizzle = SrcLinear
? TextureSwizzle.Pitch
: TextureSwizzle.BlockLinear;
TextureSwizzle DstSwizzle = DstLinear
? TextureSwizzle.Pitch
: TextureSwizzle.BlockLinear;
int SrcBlockHeight = 1 << ((SrcBlkDim >> 4) & 0xf);
int DstBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf);
long Key = Vmm.GetPhysicalAddress(MakeInt64From2xInt32(NvGpuEngine2dReg.SrcAddress));
long SrcAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.SrcAddress);
long DstAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.DstAddress);
bool IsFbTexture = Gpu.Engine3d.IsFrameBufferPosition(Key);
long SrcKey = Vmm.GetPhysicalAddress(SrcAddress);
long DstKey = Vmm.GetPhysicalAddress(DstAddress);
if (IsFbTexture && DstLinear)
bool IsSrcFb = Gpu.Engine3d.IsFrameBufferPosition(SrcKey);
bool IsDstFb = Gpu.Engine3d.IsFrameBufferPosition(DstKey);
TextureInfo SrcTexture()
DstSwizzle = TextureSwizzle.BlockLinear;
return new TextureInfo(
SrcBlockHeight, 1,
TextureInfo DstTexture = new TextureInfo(
TextureInfo DstTexture()
return new TextureInfo(
DstBlockHeight, 1,
if (IsFbTexture)
//TODO: fb -> fb copies, tex -> fb copies, formats other than RGBA8,
//make it throw for unimpl stuff (like the copy mode)...
if (IsSrcFb && IsDstFb)
//TODO: Change this when the correct frame buffer resolution is used.
//Currently, the frame buffer size is hardcoded to 1280x720.
SrcWidth = 1280;
SrcHeight = 720;
Gpu.Renderer.FrameBuffer.GetBufferData(Key, (byte[] Buffer) =>
//Frame Buffer -> Frame Buffer copy.
if (IsSrcFb)
//Frame Buffer -> Texture copy.
Gpu.Renderer.FrameBuffer.GetBufferData(SrcKey, (byte[] Buffer) =>
TextureInfo Src = SrcTexture();
TextureInfo Dst = DstTexture();
if (Src.Width != Dst.Width ||
Src.Height != Dst.Height)
throw new NotImplementedException("Texture resizing is not supported");
TextureWriter.Write(Vmm, Dst, Buffer);
else if (IsDstFb)
//Texture -> Frame Buffer copy.
const GalTextureFormat Format = GalTextureFormat.A8B8G8R8;
byte[] Buffer = TextureReader.Read(Vmm, SrcTexture());
long Size = SrcWidth * SrcHeight * 4;
//Texture -> Texture copy.
TextureInfo Src = SrcTexture();
TextureInfo Dst = DstTexture();
byte[] Buffer = Vmm.ReadBytes(SrcAddress, Size);
private void CopyTexture(
NvGpuVmm Vmm,
TextureInfo Texture,
byte[] Buffer,
int Width,
int Height)
if (Src.Width != Dst.Width ||
Src.Height != Dst.Height)
TextureWriter.Write(Vmm, Texture, Buffer, Width, Height);
throw new NotImplementedException("Texture resizing is not supported");
TextureWriter.Write(Vmm, Dst, TextureReader.Read(Vmm, Src));
private long MakeInt64From2xInt32(NvGpuEngine2dReg Reg)
@ -25,6 +25,8 @@ namespace Ryujinx.HLE.Gpu.Engines
private HashSet<long> FrameBuffers;
private List<long>[] UploadedKeys;
public NvGpuEngine3d(NvGpu Gpu)
this.Gpu = Gpu;
@ -57,6 +59,13 @@ namespace Ryujinx.HLE.Gpu.Engines
FrameBuffers = new HashSet<long>();
UploadedKeys = new List<long>[(int)NvGpuBufferType.Count];
for (int i = 0; i < UploadedKeys.Length; i++)
UploadedKeys[i] = new List<long>();
public void CallMethod(NvGpuVmm Vmm, NvGpuPBEntry PBEntry)
@ -132,10 +141,22 @@ namespace Ryujinx.HLE.Gpu.Engines
int Width = ReadRegister(NvGpuEngine3dReg.FrameBufferNWidth + FbIndex * 0x10);
int Height = ReadRegister(NvGpuEngine3dReg.FrameBufferNHeight + FbIndex * 0x10);
//Note: Using the Width/Height results seems to give incorrect results.
//Maybe the size of all frame buffers is hardcoded to screen size? This seems unlikely.
Gpu.Renderer.FrameBuffer.Create(Key, 1280, 720);
float TX = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNTranslateX + FbIndex * 4);
float TY = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNTranslateY + FbIndex * 4);
float SX = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNScaleX + FbIndex * 4);
float SY = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNScaleY + FbIndex * 4);
int VpX = (int)MathF.Max(0, TX - MathF.Abs(SX));
int VpY = (int)MathF.Max(0, TY - MathF.Abs(SY));
int VpW = (int)(TX + MathF.Abs(SX)) - VpX;
int VpH = (int)(TY + MathF.Abs(SY)) - VpY;
Gpu.Renderer.FrameBuffer.Create(Key, Width, Height);
Gpu.Renderer.FrameBuffer.SetViewport(VpX, VpY, VpW, VpH);
private long[] UploadShaders(NvGpuVmm Vmm)
@ -172,6 +193,8 @@ namespace Ryujinx.HLE.Gpu.Engines
for (; Index < 6; Index++)
GalShaderType Type = GetTypeFromProgram(Index);
int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + Index * 0x10);
int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + Index * 0x10);
@ -180,21 +203,21 @@ namespace Ryujinx.HLE.Gpu.Engines
if (!Enable)
long Key = BasePosition + (uint)Offset;
GalShaderType ShaderType = GetTypeFromProgram(Index);
Keys[(int)Type] = Key;
Keys[(int)ShaderType] = Key;
Gpu.Renderer.Shader.Create(Vmm, Key, ShaderType);
Gpu.Renderer.Shader.Create(Vmm, Key, Type);
float SignX = GetFlipSign(NvGpuEngine3dReg.ViewportScaleX);
float SignY = GetFlipSign(NvGpuEngine3dReg.ViewportScaleY);
float SignX = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleX);
float SignY = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleY);
Gpu.Renderer.Shader.SetFlip(SignX, SignY);
@ -218,8 +241,8 @@ namespace Ryujinx.HLE.Gpu.Engines
private void SetFrontFace()
float SignX = GetFlipSign(NvGpuEngine3dReg.ViewportScaleX);
float SignY = GetFlipSign(NvGpuEngine3dReg.ViewportScaleY);
float SignX = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleX);
float SignY = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleY);
GalFrontFace FrontFace = (GalFrontFace)ReadRegister(NvGpuEngine3dReg.FrontFace);
@ -502,7 +525,7 @@ namespace Ryujinx.HLE.Gpu.Engines
if (Gpu.Renderer.Texture.TryGetCachedTexture(Key, Size, out GalTexture Texture))
if (NewTexture.Equals(Texture) && !Vmm.IsRegionModified(Key, Size, NvGpuBufferType.Texture))
if (NewTexture.Equals(Texture) && !QueryKeyUpload(Vmm, Key, Size, NvGpuBufferType.Texture))
Gpu.Renderer.Texture.Bind(Key, TexIndex);
@ -546,9 +569,9 @@ namespace Ryujinx.HLE.Gpu.Engines
if (Cb.Enabled)
byte[] Data = Vmm.ReadBytes(Cb.Position, (uint)Cb.Size);
IntPtr DataAddress = Vmm.GetHostAddress(Cb.Position, Cb.Size);
Gpu.Renderer.Shader.SetConstBuffer(BasePosition + (uint)Offset, Cbuf, Data);
Gpu.Renderer.Shader.SetConstBuffer(BasePosition + (uint)Offset, Cbuf, Cb.Size, DataAddress);
@ -579,11 +602,11 @@ namespace Ryujinx.HLE.Gpu.Engines
bool IboCached = Gpu.Renderer.Rasterizer.IsIboCached(IboKey, (uint)IbSize);
if (!IboCached || Vmm.IsRegionModified(IboKey, (uint)IbSize, NvGpuBufferType.Index))
if (!IboCached || QueryKeyUpload(Vmm, IboKey, (uint)IbSize, NvGpuBufferType.Index))
byte[] Data = Vmm.ReadBytes(IndexPosition, (uint)IbSize);
IntPtr DataAddress = Vmm.GetHostAddress(IndexPosition, IbSize);
Gpu.Renderer.Rasterizer.CreateIbo(IboKey, Data);
Gpu.Renderer.Rasterizer.CreateIbo(IboKey, IbSize, DataAddress);
Gpu.Renderer.Rasterizer.SetIndexArray(IbSize, IndexFormat);
@ -643,11 +666,11 @@ namespace Ryujinx.HLE.Gpu.Engines
bool VboCached = Gpu.Renderer.Rasterizer.IsVboCached(VboKey, VbSize);
if (!VboCached || Vmm.IsRegionModified(VboKey, VbSize, NvGpuBufferType.Vertex))
if (!VboCached || QueryKeyUpload(Vmm, VboKey, VbSize, NvGpuBufferType.Vertex))
byte[] Data = Vmm.ReadBytes(VertexPosition, VbSize);
IntPtr DataAddress = Vmm.GetHostAddress(VertexPosition, VbSize);
Gpu.Renderer.Rasterizer.CreateVbo(VboKey, Data);
Gpu.Renderer.Rasterizer.CreateVbo(VboKey, (int)VbSize, DataAddress);
Gpu.Renderer.Rasterizer.SetVertexArray(Stride, VboKey, Attribs[Index].ToArray());
@ -678,6 +701,11 @@ namespace Ryujinx.HLE.Gpu.Engines
if (Mode == 0)
foreach (List<long> Uploaded in UploadedKeys)
//Write mode.
Vmm.WriteInt32(Position, Seq);
@ -760,5 +788,19 @@ namespace Ryujinx.HLE.Gpu.Engines
return FrameBuffers.Contains(Position);
private bool QueryKeyUpload(NvGpuVmm Vmm, long Key, long Size, NvGpuBufferType Type)
List<long> Uploaded = UploadedKeys[(int)Type];
if (Uploaded.Contains(Key))
return false;
return Vmm.IsRegionModified(Key, Size, Type);
@ -6,12 +6,14 @@ namespace Ryujinx.HLE.Gpu.Engines
FrameBufferNWidth = 0x202,
FrameBufferNHeight = 0x203,
FrameBufferNFormat = 0x204,
ViewportScaleX = 0x280,
ViewportScaleY = 0x281,
ViewportScaleZ = 0x282,
ViewportTranslateX = 0x283,
ViewportTranslateY = 0x284,
ViewportTranslateZ = 0x285,
ViewportNScaleX = 0x280,
ViewportNScaleY = 0x281,
ViewportNScaleZ = 0x282,
ViewportNTranslateX = 0x283,
ViewportNTranslateY = 0x284,
ViewportNTranslateZ = 0x285,
ViewportNHoriz = 0x300,
ViewportNVert = 0x301,
VertexArrayFirst = 0x35d,
VertexArrayCount = 0x35e,
ClearDepth = 0x364,
@ -1,5 +1,6 @@
using ChocolArm64.Memory;
using Ryujinx.Graphics.Gal;
using System;
using System.Collections.Concurrent;
namespace Ryujinx.HLE.Gpu.Memory
@ -279,6 +280,11 @@ namespace Ryujinx.HLE.Gpu.Memory
return Cache.IsRegionModified(Memory, BufferType, PA, Size);
public IntPtr GetHostAddress(long Position, long Size)
return Memory.GetHostAddress(GetPhysicalAddress(Position), Size);
public byte ReadByte(long Position)
Position = GetPhysicalAddress(Position);
@ -25,6 +25,8 @@ namespace Ryujinx.HLE.Gpu.Memory
private List<Range>[] Regions;
private HashSet<long> ResidencyKeys;
public LinkedListNode<long> Node { get; set; }
public int Timestamp { get; private set; }
@ -37,6 +39,27 @@ namespace Ryujinx.HLE.Gpu.Memory
Regions[Index] = new List<Range>();
ResidencyKeys = new HashSet<long>();
public void AddResidency(long Key)
public void RemoveResidency(HashSet<long>[] Residency, long PageSize)
for (int i = 0; i < (int)NvGpuBufferType.Count; i++)
foreach (Range Region in Regions[i])
foreach (long Key in ResidencyKeys)
Residency[Region.Start / PageSize].Remove(Key);
public bool AddRange(long Start, long End, NvGpuBufferType BufferType)
@ -89,6 +112,10 @@ namespace Ryujinx.HLE.Gpu.Memory
private LinkedList<long> SortedCache;
private HashSet<long>[] Residency;
private long ResidencyPageSize;
private int CpCount;
public NvGpuVmmCache()
@ -100,7 +127,7 @@ namespace Ryujinx.HLE.Gpu.Memory
public bool IsRegionModified(AMemory Memory, NvGpuBufferType BufferType, long PA, long Size)
bool[] Modified = Memory.IsRegionModified(PA, Size);
(bool[] Modified, long ModifiedCount) = Memory.IsRegionModified(PA, Size);
if (Modified == null)
@ -111,8 +138,19 @@ namespace Ryujinx.HLE.Gpu.Memory
long PageSize = Memory.GetHostPageSize();
bool HasResidents = AddResidency(PA, Size);
if (!HasResidents && ModifiedCount == 0)
return false;
long Mask = PageSize - 1;
long ResidencyKey = PA;
long PAEnd = PA + Size;
bool RegMod = false;
@ -147,6 +185,8 @@ namespace Ryujinx.HLE.Gpu.Memory
Cache[Key] = Cp;
Cp.Node = SortedCache.AddLast(Key);
RegMod |= Cp.AddRange(PA, PAPgEnd, BufferType);
@ -159,6 +199,53 @@ namespace Ryujinx.HLE.Gpu.Memory
return RegMod;
private bool AddResidency(long PA, long Size)
long PageSize = ResidencyPageSize;
long Mask = PageSize - 1;
long Key = PA;
bool ResidentFound = false;
for (long Cursor = PA & ~Mask; Cursor < ((PA + Size + PageSize - 1) & ~Mask); Cursor += PageSize)
long PageIndex = Cursor / PageSize;
if (Residency[PageIndex].Count > 1)
ResidentFound = true;
return ResidentFound;
private void EnsureResidencyInitialized(long PageSize)
if (Residency == null)
Residency = new HashSet<long>[AMemoryMgr.RamSize / PageSize];
for (int i = 0; i < Residency.Length; i++)
Residency[i] = new HashSet<long>();
ResidencyPageSize = PageSize;
if (ResidencyPageSize != PageSize)
throw new InvalidOperationException("Tried to change residency page size");
private void ClearCachedPagesIfNeeded()
if (CpCount <= MaxCpCount)
@ -179,6 +266,8 @@ namespace Ryujinx.HLE.Gpu.Memory
CachedPage Cp = Cache[Key];
Cp.RemoveResidency(Residency, ResidencyPageSize);
CpCount -= Cp.GetTotalCount();
@ -56,8 +56,10 @@ namespace Ryujinx.HLE.Gpu.Texture
int Pitch = (Tic[3] & 0xffff) << 5;
int BlockHeightLog2 = (Tic[3] >> 3) & 7;
int TileWidthLog2 = (Tic[3] >> 10) & 7;
int BlockHeight = 1 << BlockHeightLog2;
int TileWidth = 1 << TileWidthLog2;
int Width = (Tic[4] & 0xffff) + 1;
int Height = (Tic[5] & 0xffff) + 1;
@ -68,6 +70,7 @@ namespace Ryujinx.HLE.Gpu.Texture
@ -7,8 +7,14 @@ namespace Ryujinx.HLE.Gpu.Texture
static class TextureHelper
public static ISwizzle GetSwizzle(TextureInfo Texture, int Width, int Bpp)
public static ISwizzle GetSwizzle(TextureInfo Texture, int BlockWidth, int Bpp)
int Width = (Texture.Width + (BlockWidth - 1)) / BlockWidth;
int AlignMask = Texture.TileWidth * (64 / Bpp) - 1;
Width = (Width + AlignMask) & ~AlignMask;
switch (Texture.Swizzle)
case TextureSwizzle._1dBuffer:
@ -35,9 +41,12 @@ namespace Ryujinx.HLE.Gpu.Texture
return Texture.Width * Texture.Height * 8;
case GalTextureFormat.A8B8G8R8:
case GalTextureFormat.A2B10G10R10:
case GalTextureFormat.R32:
case GalTextureFormat.R16_G16:
case GalTextureFormat.ZF32:
case GalTextureFormat.BF10GF11RF11:
case GalTextureFormat.Z24S8:
return Texture.Width * Texture.Height * 4;
case GalTextureFormat.A1B5G5R5:
@ -55,6 +64,8 @@ namespace Ryujinx.HLE.Gpu.Texture
return CompressedTextureSize(Texture.Width, Texture.Height, 4, 4, 8);
case GalTextureFormat.BC6H_SF16:
case GalTextureFormat.BC6H_UF16:
case GalTextureFormat.BC7U:
case GalTextureFormat.BC2:
case GalTextureFormat.BC3:
@ -11,6 +11,7 @@ namespace Ryujinx.HLE.Gpu.Texture
public int Pitch { get; private set; }
public int BlockHeight { get; private set; }
public int TileWidth { get; private set; }
public TextureSwizzle Swizzle { get; private set; }
@ -29,6 +30,8 @@ namespace Ryujinx.HLE.Gpu.Texture
BlockHeight = 16;
TileWidth = 1;
Swizzle = TextureSwizzle.BlockLinear;
Format = GalTextureFormat.A8B8G8R8;
@ -40,6 +43,7 @@ namespace Ryujinx.HLE.Gpu.Texture
int Height,
int Pitch,
int BlockHeight,
int TileWidth,
TextureSwizzle Swizzle,
GalTextureFormat Format)
@ -48,6 +52,7 @@ namespace Ryujinx.HLE.Gpu.Texture
this.Height = Height;
this.Pitch = Pitch;
this.BlockHeight = BlockHeight;
this.TileWidth = TileWidth;
this.Swizzle = Swizzle;
this.Format = Format;
@ -13,13 +13,18 @@ namespace Ryujinx.HLE.Gpu.Texture
case GalTextureFormat.R32G32B32A32: return Read16Bpp (Memory, Texture);
case GalTextureFormat.R16G16B16A16: return Read8Bpp (Memory, Texture);
case GalTextureFormat.A8B8G8R8: return Read4Bpp (Memory, Texture);
case GalTextureFormat.A2B10G10R10: return Read4Bpp (Memory, Texture);
case GalTextureFormat.R16_G16: return Read4Bpp (Memory, Texture);
case GalTextureFormat.R32: return Read4Bpp (Memory, Texture);
case GalTextureFormat.BF10GF11RF11: return Read4Bpp (Memory, Texture);
case GalTextureFormat.Z24S8: return Read4Bpp (Memory, Texture);
case GalTextureFormat.A1B5G5R5: return Read5551 (Memory, Texture);
case GalTextureFormat.B5G6R5: return Read565 (Memory, Texture);
case GalTextureFormat.G8R8: return Read2Bpp (Memory, Texture);
case GalTextureFormat.R16: return Read2Bpp (Memory, Texture);
case GalTextureFormat.R8: return Read1Bpp (Memory, Texture);
case GalTextureFormat.BC6H_SF16: return Read16BptCompressedTexture(Memory, Texture, 4, 4);
case GalTextureFormat.BC6H_UF16: return Read16BptCompressedTexture(Memory, Texture, 4, 4);
case GalTextureFormat.BC7U: return Read16BptCompressedTexture(Memory, Texture, 4, 4);
case GalTextureFormat.BC1: return Read8Bpt4x4 (Memory, Texture);
case GalTextureFormat.BC2: return Read16BptCompressedTexture(Memory, Texture, 4, 4);
@ -53,7 +58,7 @@ namespace Ryujinx.HLE.Gpu.Texture
byte[] Output = new byte[Width * Height];
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 1);
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 1);
(AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition(
@ -86,7 +91,7 @@ namespace Ryujinx.HLE.Gpu.Texture
byte[] Output = new byte[Width * Height * 2];
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 2);
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 2);
(AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition(
@ -124,7 +129,7 @@ namespace Ryujinx.HLE.Gpu.Texture
byte[] Output = new byte[Width * Height * 2];
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 2);
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 2);
(AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition(
@ -161,7 +166,7 @@ namespace Ryujinx.HLE.Gpu.Texture
byte[] Output = new byte[Width * Height * 2];
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 2);
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 2);
(AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition(
@ -194,7 +199,7 @@ namespace Ryujinx.HLE.Gpu.Texture
byte[] Output = new byte[Width * Height * 4];
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 4);
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 4);
(AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition(
@ -227,7 +232,7 @@ namespace Ryujinx.HLE.Gpu.Texture
byte[] Output = new byte[Width * Height * 8];
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 8);
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 8);
(AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition(
@ -260,7 +265,7 @@ namespace Ryujinx.HLE.Gpu.Texture
byte[] Output = new byte[Width * Height * 16];
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 16);
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 16);
(AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition(
@ -295,7 +300,7 @@ namespace Ryujinx.HLE.Gpu.Texture
byte[] Output = new byte[Width * Height * 8];
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 8);
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 4, 8);
(AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition(
@ -328,7 +333,7 @@ namespace Ryujinx.HLE.Gpu.Texture
byte[] Output = new byte[Width * Height * 16];
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 16);
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, BlockWidth, 16);
(AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition(
@ -6,29 +6,9 @@ namespace Ryujinx.HLE.Gpu.Texture
static class TextureWriter
public static void Write(
IAMemory Memory,
TextureInfo Texture,
byte[] Data,
int Width,
int Height)
public unsafe static void Write(IAMemory Memory, TextureInfo Texture, byte[] Data)
switch (Texture.Format)
case GalTextureFormat.A8B8G8R8: Write4Bpp(Memory, Texture, Data, Width, Height); break;
default: throw new NotImplementedException(Texture.Format.ToString());
private unsafe static void Write4Bpp(
IAMemory Memory,
TextureInfo Texture,
byte[] Data,
int Width,
int Height)
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 4);
ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 4);
(AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition(
@ -38,8 +18,8 @@ namespace Ryujinx.HLE.Gpu.Texture
long InOffs = 0;
for (int Y = 0; Y < Height; Y++)
for (int X = 0; X < Width; X++)
for (int Y = 0; Y < Texture.Height; Y++)
for (int X = 0; X < Texture.Width; X++)
long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y);
@ -67,7 +67,7 @@ namespace Ryujinx.HLE.Input
private object ShMemLock;
private (AMemory, long)[] ShMemPositions;
private (AMemory, long, long)[] ShMemPositions;
public Hid(Logger Log)
@ -75,7 +75,7 @@ namespace Ryujinx.HLE.Input
ShMemLock = new object();
ShMemPositions = new (AMemory, long)[0];
ShMemPositions = new (AMemory, long, long)[0];
internal void ShMemMap(object sender, EventArgs e)
@ -86,7 +86,7 @@ namespace Ryujinx.HLE.Input
ShMemPositions = SharedMem.GetVirtualPositions();
(AMemory Memory, long Position) = ShMemPositions[ShMemPositions.Length - 1];
(AMemory Memory, long Position, long Size) = ShMemPositions[ShMemPositions.Length - 1];
for (long Offset = 0; Offset < Horizon.HidSize; Offset += 8)
@ -167,7 +167,7 @@ namespace Ryujinx.HLE.Input
lock (ShMemLock)
foreach ((AMemory Memory, long Position) in ShMemPositions)
foreach ((AMemory Memory, long Position, long Size) in ShMemPositions)
long ControllerOffset = Position + HidControllersOffset;
@ -218,7 +218,7 @@ namespace Ryujinx.HLE.Input
lock (ShMemLock)
foreach ((AMemory Memory, long Position) in ShMemPositions)
foreach ((AMemory Memory, long Position, long Size) in ShMemPositions)
long TouchScreenOffset = Position + HidTouchScreenOffset;
@ -2,6 +2,7 @@ using ChocolArm64.Memory;
using Ryujinx.HLE.Loaders.Executables;
using Ryujinx.HLE.OsHle;
using System.Collections.Generic;
using System.IO;
namespace Ryujinx.HLE.Loaders
@ -15,6 +16,8 @@ namespace Ryujinx.HLE.Loaders
public string Name { get; private set; }
public string FilePath { get; private set; }
private AMemory Memory;
public long ImageBase { get; private set; }
@ -26,7 +29,12 @@ namespace Ryujinx.HLE.Loaders
m_SymbolTable = new Dictionary<long, string>();
Name = Exe.Name;
FilePath = Exe.FilePath;
if (FilePath != null)
Name = Path.GetFileNameWithoutExtension(FilePath.Replace(Homebrew.TemporaryNroSuffix, ""));
this.Memory = Memory;
this.ImageBase = ImageBase;
@ -2,7 +2,7 @@ namespace Ryujinx.HLE.Loaders.Executables
public interface IExecutable
string Name { get; }
string FilePath { get; }
byte[] Text { get; }
byte[] RO { get; }
@ -4,7 +4,7 @@ namespace Ryujinx.HLE.Loaders.Executables
class Nro : IExecutable
public string Name { get; private set; }
public string FilePath { get; private set; }
public byte[] Text { get; private set; }
public byte[] RO { get; private set; }
@ -16,9 +16,9 @@ namespace Ryujinx.HLE.Loaders.Executables
public int DataOffset { get; private set; }
public int BssSize { get; private set; }
public Nro(Stream Input, string Name)
public Nro(Stream Input, string FilePath)
this.Name = Name;
this.FilePath = FilePath;
BinaryReader Reader = new BinaryReader(Input);
@ -6,7 +6,7 @@ namespace Ryujinx.HLE.Loaders.Executables
class Nso : IExecutable
public string Name { get; private set; }
public string FilePath { get; private set; }
public byte[] Text { get; private set; }
public byte[] RO { get; private set; }
@ -29,9 +29,9 @@ namespace Ryujinx.HLE.Loaders.Executables
HasDataHash = 1 << 5
public Nso(Stream Input, string Name)
public Nso(Stream Input, string FilePath)
this.Name = Name;
this.FilePath = FilePath;
BinaryReader Reader = new BinaryReader(Input);
@ -4,6 +4,7 @@ namespace Ryujinx.HLE.Logging
@ -6,37 +6,37 @@ namespace Ryujinx.HLE.OsHle.Handles
class HSharedMem
private List<(AMemory, long)> Positions;
private List<(AMemory, long, long)> Positions;
public EventHandler<EventArgs> MemoryMapped;
public EventHandler<EventArgs> MemoryUnmapped;
public HSharedMem()
Positions = new List<(AMemory, long)>();
Positions = new List<(AMemory, long, long)>();
public void AddVirtualPosition(AMemory Memory, long Position)
public void AddVirtualPosition(AMemory Memory, long Position, long Size)
lock (Positions)
Positions.Add((Memory, Position));
Positions.Add((Memory, Position, Size));
MemoryMapped?.Invoke(this, EventArgs.Empty);
public void RemoveVirtualPosition(AMemory Memory, long Position)
public void RemoveVirtualPosition(AMemory Memory, long Position, long Size)
lock (Positions)
Positions.Remove((Memory, Position));
Positions.Remove((Memory, Position, Size));
MemoryUnmapped?.Invoke(this, EventArgs.Empty);
public (AMemory, long)[] GetVirtualPositions()
public (AMemory, long, long)[] GetVirtualPositions()
return Positions.ToArray();
@ -11,8 +11,10 @@ namespace Ryujinx.HLE.OsHle.Handles
public long MutexAddress { get; set; }
public long CondVarAddress { get; set; }
public long ArbiterWaitAddress { get; set; }
public bool CondVarSignaled { get; set; }
public bool ArbiterSignaled { get; set; }
private Process Process;
@ -1,11 +1,14 @@
using ChocolArm64.Memory;
using System.Text;
namespace Ryujinx.HLE.OsHle
static class Homebrew
public const string TemporaryNroSuffix = ".ryu_tmp.nro";
public static void WriteHbAbiData(AMemory Memory, long Position, int MainThreadHandle)
public static void WriteHbAbiData(AMemory Memory, long Position, int MainThreadHandle, string SwitchPath)
Memory.Manager.Map(Position, AMemoryMgr.PageSize, (int)MemoryType.Normal, AMemoryPerm.RW);
@ -15,6 +18,11 @@ namespace Ryujinx.HLE.OsHle
WriteConfigEntry(Memory, ref Position, 2, 0, Position + 0x200, Position + 0x400);
// Argv
long ArgvPosition = Position + 0xC00;
WriteConfigEntry(Memory, ref Position, 5, 0, 0, ArgvPosition);
Memory.WriteBytes(ArgvPosition, Encoding.ASCII.GetBytes(SwitchPath + "\0"));
WriteConfigEntry(Memory, ref Position, 7);
@ -1,4 +1,5 @@
using Ryujinx.HLE.Loaders.Executables;
using Ryujinx.HLE.Loaders.Npdm;
using Ryujinx.HLE.Logging;
using Ryujinx.HLE.OsHle.Handles;
using System;
@ -76,6 +77,25 @@ namespace Ryujinx.HLE.OsHle
void LoadNpdm(string FileName)
string File = Directory.GetFiles(ExeFsDir, FileName)[0];
Ns.Log.PrintInfo(LogClass.Loader, "Loading Title Metadata...");
using (FileStream Input = new FileStream(File, FileMode.Open))
MainProcess.Metadata = new Npdm(Input);
if (!MainProcess.Metadata.Is64Bits)
throw new NotImplementedException("32-bit titles are unsupported!");
@ -87,19 +107,35 @@ namespace Ryujinx.HLE.OsHle
public void LoadProgram(string FileName)
public void LoadProgram(string FilePath)
bool IsNro = Path.GetExtension(FileName).ToLower() == ".nro";
bool IsNro = Path.GetExtension(FilePath).ToLower() == ".nro";
string Name = Path.GetFileNameWithoutExtension(FileName);
string Name = Path.GetFileNameWithoutExtension(FilePath);
string SwitchFilePath = Ns.VFs.SystemPathToSwitchPath(FilePath);
if (IsNro && (SwitchFilePath == null || !SwitchFilePath.StartsWith("sdmc:/")))
string SwitchPath = $"sdmc:/switch/{Name}{Homebrew.TemporaryNroSuffix}";
string TempPath = Ns.VFs.SwitchPathToSystemPath(SwitchPath);
string SwitchDir = Path.GetDirectoryName(TempPath);
if (!Directory.Exists(SwitchDir))
File.Copy(FilePath, TempPath, true);
FilePath = TempPath;
Process MainProcess = MakeProcess();
using (FileStream Input = new FileStream(FileName, FileMode.Open))
using (FileStream Input = new FileStream(FilePath, FileMode.Open))
? (IExecutable)new Nro(Input, Name)
: (IExecutable)new Nso(Input, Name));
? (IExecutable)new Nro(Input, FilePath)
: (IExecutable)new Nso(Input, FilePath));
@ -47,13 +47,15 @@ namespace Ryujinx.HLE.OsHle.Ipc
HasPId = true;
public static IpcHandleDesc MakeCopy(int Handle) => new IpcHandleDesc(
new int[] { Handle },
new int[0]);
public static IpcHandleDesc MakeCopy(params int[] Handles)
return new IpcHandleDesc(Handles, new int[0]);
public static IpcHandleDesc MakeMove(int Handle) => new IpcHandleDesc(
new int[0],
new int[] { Handle });
public static IpcHandleDesc MakeMove(params int[] Handles)
return new IpcHandleDesc(new int[0], Handles);
public byte[] GetBytes()
@ -21,7 +21,8 @@ namespace Ryujinx.HLE.OsHle.Ipc
BinaryReader ReqReader = new BinaryReader(Raw);
if (Request.Type == IpcMessageType.Request)
if (Request.Type == IpcMessageType.Request ||
Request.Type == IpcMessageType.RequestWithContext)
Response.Type = IpcMessageType.Response;
@ -44,7 +45,8 @@ namespace Ryujinx.HLE.OsHle.Ipc
Response.RawData = ResMS.ToArray();
else if (Request.Type == IpcMessageType.Control)
else if (Request.Type == IpcMessageType.Control ||
Request.Type == IpcMessageType.ControlWithContext)
long Magic = ReqReader.ReadInt64();
long CmdId = ReqReader.ReadInt64();
@ -15,7 +15,7 @@ namespace Ryujinx.HLE.OsHle.Ipc
public List<IpcBuffDesc> ExchangeBuff { get; private set; }
public List<IpcRecvListBuffDesc> RecvListBuff { get; private set; }
public List<int> ResponseObjIds { get; private set; }
public List<int> ObjectIds { get; private set; }
public byte[] RawData { get; set; }
@ -27,7 +27,7 @@ namespace Ryujinx.HLE.OsHle.Ipc
ExchangeBuff = new List<IpcBuffDesc>();
RecvListBuff = new List<IpcRecvListBuffDesc>();
ResponseObjIds = new List<int>();
ObjectIds = new List<int>();
public IpcMessage(byte[] Data, long CmdPtr) : this()
@ -5,6 +5,8 @@ namespace Ryujinx.HLE.OsHle.Ipc
Response = 0,
CloseSession = 2,
Request = 4,
Control = 5
Control = 5,
RequestWithContext = 6,
ControlWithContext = 7
Normal file
Normal file
@ -0,0 +1,112 @@
using ChocolArm64.Memory;
using ChocolArm64.State;
using Ryujinx.HLE.OsHle.Handles;
using static Ryujinx.HLE.OsHle.ErrorCode;
namespace Ryujinx.HLE.OsHle.Kernel
static class AddressArbiter
static ulong WaitForAddress(Process Process, AThreadState ThreadState, long Address, ulong Timeout)
KThread CurrentThread = Process.GetThread(ThreadState.Tpidr);
CurrentThread.ArbiterWaitAddress = Address;
CurrentThread.ArbiterSignaled = false;
Process.Scheduler.EnterWait(CurrentThread, NsTimeConverter.GetTimeMs(Timeout));
if (!CurrentThread.ArbiterSignaled)
return MakeError(ErrorModule.Kernel, KernelErr.Timeout);
return 0;
public static ulong WaitForAddressIfLessThan(Process Process,
AThreadState ThreadState,
AMemory Memory,
long Address,
int Value,
ulong Timeout,
bool ShouldDecrement)
Memory.SetExclusive(ThreadState, Address);
int CurrentValue = Memory.ReadInt32(Address);
while (true)
if (Memory.TestExclusive(ThreadState, Address))
if (CurrentValue < Value)
if (ShouldDecrement)
Memory.WriteInt32(Address, CurrentValue - 1);
return MakeError(ErrorModule.Kernel, KernelErr.InvalidState);
Memory.SetExclusive(ThreadState, Address);
CurrentValue = Memory.ReadInt32(Address);
if (Timeout == 0)
return MakeError(ErrorModule.Kernel, KernelErr.Timeout);
return WaitForAddress(Process, ThreadState, Address, Timeout);
public static ulong WaitForAddressIfEqual(Process Process,
AThreadState ThreadState,
AMemory Memory,
long Address,
int Value,
ulong Timeout)
if (Memory.ReadInt32(Address) != Value)
return MakeError(ErrorModule.Kernel, KernelErr.InvalidState);
if (Timeout == 0)
return MakeError(ErrorModule.Kernel, KernelErr.Timeout);
return WaitForAddress(Process, ThreadState, Address, Timeout);
enum ArbitrationType : int
enum SignalType : int
@ -12,7 +12,7 @@ namespace Ryujinx.HLE.OsHle.Kernel
public const int Timeout = 117;
public const int Canceled = 118;
public const int CountOutOfRange = 119;
public const int InvalidInfo = 120;
public const int InvalidEnumValue = 120;
public const int InvalidThread = 122;
public const int InvalidState = 125;
@ -22,7 +22,7 @@ namespace Ryujinx.HLE.OsHle.Kernel
private ConcurrentDictionary<KThread, AutoResetEvent> SyncWaits;
private HashSet<(HSharedMem, long)> MappedSharedMems;
private HashSet<(HSharedMem, long, long)> MappedSharedMems;
private ulong CurrentHeapSize;
@ -73,7 +73,8 @@ namespace Ryujinx.HLE.OsHle.Kernel
{ 0x2c, SvcMapPhysicalMemory },
{ 0x2d, SvcUnmapPhysicalMemory },
{ 0x32, SvcSetThreadActivity },
{ 0x33, SvcGetThreadContext3 }
{ 0x33, SvcGetThreadContext3 },
{ 0x34, SvcWaitForAddress }
this.Ns = Ns;
@ -82,7 +83,7 @@ namespace Ryujinx.HLE.OsHle.Kernel
SyncWaits = new ConcurrentDictionary<KThread, AutoResetEvent>();
MappedSharedMems = new HashSet<(HSharedMem, long)>();
MappedSharedMems = new HashSet<(HSharedMem, long, long)>();
static SvcHandler()
@ -137,9 +138,9 @@ namespace Ryujinx.HLE.OsHle.Kernel
lock (MappedSharedMems)
foreach ((HSharedMem SharedMem, long Position) in MappedSharedMems)
foreach ((HSharedMem SharedMem, long Position, long Size) in MappedSharedMems)
SharedMem.RemoveVirtualPosition(Memory, Position);
SharedMem.RemoveVirtualPosition(Memory, Position, Size);
@ -174,15 +174,15 @@ namespace Ryujinx.HLE.OsHle.Kernel
AMemoryHelper.FillWithZeros(Memory, Src, (int)Size);
SharedMem.AddVirtualPosition(Memory, Src, Size);
Memory.Manager.Reprotect(Src, Size, (AMemoryPerm)Perm);
lock (MappedSharedMems)
MappedSharedMems.Add((SharedMem, Src));
MappedSharedMems.Add((SharedMem, Src, Size));
SharedMem.AddVirtualPosition(Memory, Src);
ThreadState.X0 = 0;
@ -210,11 +210,11 @@ namespace Ryujinx.HLE.OsHle.Kernel
Memory.Manager.Unmap(Src, Size, (int)MemoryType.SharedMemory);
SharedMem.RemoveVirtualPosition(Memory, Src);
SharedMem.RemoveVirtualPosition(Memory, Src, Size);
lock (MappedSharedMems)
MappedSharedMems.Remove((SharedMem, Src));
MappedSharedMems.Remove((SharedMem, Src, Size));
ThreadState.X0 = 0;
@ -242,7 +242,6 @@ namespace Ryujinx.HLE.OsHle.Kernel
IpcMessage Cmd = new IpcMessage(CmdData, CmdPtr);
long Result = IpcHandler.IpcCall(Ns, Process, Memory, Session, Cmd, CmdPtr);
@ -294,7 +293,7 @@ namespace Ryujinx.HLE.OsHle.Kernel
InfoType == 19 ||
InfoType == 20)
ThreadState.X0 = MakeError(ErrorModule.Kernel, KernelErr.InvalidInfo);
ThreadState.X0 = MakeError(ErrorModule.Kernel, KernelErr.InvalidEnumValue);
@ -87,7 +87,7 @@ namespace Ryujinx.HLE.OsHle.Kernel
KThread CurrThread = Process.GetThread(ThreadState.Tpidr);
if (TimeoutNs == 0)
if (TimeoutNs == 0 || TimeoutNs == ulong.MaxValue)
@ -197,6 +197,57 @@ namespace Ryujinx.HLE.OsHle.Kernel
private void SvcWaitForAddress(AThreadState ThreadState)
long Address = (long)ThreadState.X0;
ArbitrationType Type = (ArbitrationType)ThreadState.X1;
int Value = (int)ThreadState.X2;
ulong Timeout = ThreadState.X3;
"Address = " + Address.ToString("x16") + ", " +
"ArbitrationType = " + Type .ToString() + ", " +
"Value = " + Value .ToString("x8") + ", " +
"Timeout = " + Timeout.ToString("x16"));
if (IsPointingInsideKernel(Address))
Ns.Log.PrintWarning(LogClass.KernelSvc, $"Invalid address 0x{Address:x16}!");
ThreadState.X0 = MakeError(ErrorModule.Kernel, KernelErr.InvalidAddress);
if (IsWordAddressUnaligned(Address))
Ns.Log.PrintWarning(LogClass.KernelSvc, $"Unaligned address 0x{Address:x16}!");
ThreadState.X0 = MakeError(ErrorModule.Kernel, KernelErr.InvalidAlignment);
switch (Type)
case ArbitrationType.WaitIfLessThan:
ThreadState.X0 = AddressArbiter.WaitForAddressIfLessThan(Process, ThreadState, Memory, Address, Value, Timeout, false);
case ArbitrationType.DecrementAndWaitIfLessThan:
ThreadState.X0 = AddressArbiter.WaitForAddressIfLessThan(Process, ThreadState, Memory, Address, Value, Timeout, true);
case ArbitrationType.WaitIfEqual:
ThreadState.X0 = AddressArbiter.WaitForAddressIfEqual(Process, ThreadState, Memory, Address, Value, Timeout);
ThreadState.X0 = MakeError(ErrorModule.Kernel, KernelErr.InvalidEnumValue);
private void MutexUnlock(KThread CurrThread, long MutexAddress)
lock (Process.ThreadSyncLock)
@ -4,6 +4,7 @@ using ChocolArm64.Memory;
using ChocolArm64.State;
using Ryujinx.HLE.Loaders;
using Ryujinx.HLE.Loaders.Executables;
using Ryujinx.HLE.Loaders.Npdm;
using Ryujinx.HLE.Logging;
using Ryujinx.HLE.OsHle.Diagnostics;
using Ryujinx.HLE.OsHle.Exceptions;
@ -13,6 +14,7 @@ using Ryujinx.HLE.OsHle.Services.Nv;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace Ryujinx.HLE.OsHle
@ -47,6 +49,8 @@ namespace Ryujinx.HLE.OsHle
public AppletStateMgr AppletState { get; private set; }
public Npdm Metadata { get; set; }
private SvcHandler SvcHandler;
private ConcurrentDictionary<int, AThread> TlsSlots;
@ -155,7 +159,9 @@ namespace Ryujinx.HLE.OsHle
HbAbiDataPosition = AMemoryHelper.PageRoundUp(Executables[0].ImageEnd);
Homebrew.WriteHbAbiData(Memory, HbAbiDataPosition, Handle);
string SwitchPath = Ns.VFs.SystemPathToSwitchPath(Executables[0].FilePath);
Homebrew.WriteHbAbiData(Memory, HbAbiDataPosition, Handle, SwitchPath);
MainThread.Thread.ThreadState.X0 = (ulong)HbAbiDataPosition;
MainThread.Thread.ThreadState.X1 = ulong.MaxValue;
@ -423,6 +429,11 @@ namespace Ryujinx.HLE.OsHle
if (NeedsHbAbi && Executables.Count > 0 && Executables[0].FilePath.EndsWith(Homebrew.TemporaryNroSuffix))
@ -17,7 +17,9 @@ namespace Ryujinx.HLE.OsHle.Services.Am
m_Commands = new Dictionary<int, ServiceProcessRequest>()
{ 0, Exit },
{ 1, LockExit },
{ 2, UnlockExit },
{ 9, GetLibraryAppletLaunchableEvent },
{ 10, SetScreenShotPermission },
{ 11, SetOperationModeChangedNotification },
@ -31,8 +33,24 @@ namespace Ryujinx.HLE.OsHle.Services.Am
LaunchableEvent = new KEvent();
public long Exit(ServiceCtx Context)
Context.Ns.Log.PrintStub(LogClass.ServiceAm, "Stubbed.");
return 0;
public long LockExit(ServiceCtx Context)
Context.Ns.Log.PrintStub(LogClass.ServiceAm, "Stubbed.");
return 0;
public long UnlockExit(ServiceCtx Context)
Context.Ns.Log.PrintStub(LogClass.ServiceAm, "Stubbed.");
return 0;
@ -3,6 +3,7 @@ namespace Ryujinx.HLE.OsHle.Services.Aud
static class AudErr
public const int DeviceNotFound = 1;
public const int UnsupportedRevision = 2;
public const int UnsupportedSampleRate = 3;
@ -1,6 +1,6 @@
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.OsHle.Services.Aud
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioOut
struct AudioOutData
@ -1,12 +1,11 @@
using ChocolArm64.Memory;
using Ryujinx.Audio;
using Ryujinx.HLE.Logging;
using Ryujinx.HLE.OsHle.Handles;
using Ryujinx.HLE.OsHle.Ipc;
using System;
using System.Collections.Generic;
namespace Ryujinx.HLE.OsHle.Services.Aud
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioOut
class IAudioOut : IpcService, IDisposable
@ -0,0 +1,8 @@
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
static class AudioConsts
public const int HostSampleRate = 48000;
public const int HostChannelsCount = 2;
Normal file
Normal file
@ -0,0 +1,11 @@
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
[StructLayout(LayoutKind.Sequential, Size = 0x10, Pack = 4)]
struct BehaviorIn
public long Unknown0;
public long Unknown8;
Normal file
Normal file
@ -0,0 +1,16 @@
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
[StructLayout(LayoutKind.Sequential, Size = 0xc, Pack = 1)]
struct BiquadFilter
public byte Enable;
public byte Padding;
public short B0;
public short B1;
public short B2;
public short A1;
public short A2;
Normal file
Normal file
@ -0,0 +1,316 @@
using ChocolArm64.Memory;
using Ryujinx.Audio;
using Ryujinx.Audio.Adpcm;
using Ryujinx.HLE.Logging;
using Ryujinx.HLE.OsHle.Handles;
using Ryujinx.HLE.OsHle.Ipc;
using Ryujinx.HLE.OsHle.Utilities;
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
class IAudioRenderer : IpcService, IDisposable
//This is the amount of samples that are going to be appended
//each time that RequestUpdateAudioRenderer is called. Ideally,
//this value shouldn't be neither too small (to avoid the player
//starving due to running out of samples) or too large (to avoid
//high latency).
private const int MixBufferSamplesCount = 960;
private Dictionary<int, ServiceProcessRequest> m_Commands;
public override IReadOnlyDictionary<int, ServiceProcessRequest> Commands => m_Commands;
private KEvent UpdateEvent;
private AMemory Memory;
private IAalOutput AudioOut;
private AudioRendererParameter Params;
private MemoryPoolContext[] MemoryPools;
private VoiceContext[] Voices;
private int Track;
public IAudioRenderer(AMemory Memory, IAalOutput AudioOut, AudioRendererParameter Params)
m_Commands = new Dictionary<int, ServiceProcessRequest>()
{ 4, RequestUpdateAudioRenderer },
{ 5, StartAudioRenderer },
{ 6, StopAudioRenderer },
{ 7, QuerySystemEvent }
UpdateEvent = new KEvent();
this.Memory = Memory;
this.AudioOut = AudioOut;
this.Params = Params;
Track = AudioOut.OpenTrack(
MemoryPools = CreateArray<MemoryPoolContext>(Params.EffectCount + Params.VoiceCount * 4);
Voices = CreateArray<VoiceContext>(Params.VoiceCount);
private void AudioCallback()
private static T[] CreateArray<T>(int Size) where T : new()
T[] Output = new T[Size];
for (int Index = 0; Index < Size; Index++)
Output[Index] = new T();
return Output;
private void InitializeAudioOut()
public long RequestUpdateAudioRenderer(ServiceCtx Context)
long OutputPosition = Context.Request.ReceiveBuff[0].Position;
long OutputSize = Context.Request.ReceiveBuff[0].Size;
AMemoryHelper.FillWithZeros(Context.Memory, OutputPosition, (int)OutputSize);
long InputPosition = Context.Request.SendBuff[0].Position;
StructReader Reader = new StructReader(Context.Memory, InputPosition);
StructWriter Writer = new StructWriter(Context.Memory, OutputPosition);
UpdateDataHeader InputHeader = Reader.Read<UpdateDataHeader>();
MemoryPoolIn[] MemoryPoolsIn = Reader.Read<MemoryPoolIn>(InputHeader.MemoryPoolSize);
for (int Index = 0; Index < MemoryPoolsIn.Length; Index++)
MemoryPoolIn MemoryPool = MemoryPoolsIn[Index];
if (MemoryPool.State == MemoryPoolState.RequestAttach)
MemoryPools[Index].OutStatus.State = MemoryPoolState.Attached;
else if (MemoryPool.State == MemoryPoolState.RequestDetach)
MemoryPools[Index].OutStatus.State = MemoryPoolState.Detached;
VoiceIn[] VoicesIn = Reader.Read<VoiceIn>(InputHeader.VoiceSize);
for (int Index = 0; Index < VoicesIn.Length; Index++)
VoiceIn Voice = VoicesIn[Index];
VoiceContext VoiceCtx = Voices[Index];
VoiceCtx.SetAcquireState(Voice.Acquired != 0);
if (Voice.Acquired == 0)
if (Voice.FirstUpdate != 0)
VoiceCtx.AdpcmCtx = GetAdpcmDecoderContext(
VoiceCtx.SampleFormat = Voice.SampleFormat;
VoiceCtx.SampleRate = Voice.SampleRate;
VoiceCtx.ChannelsCount = Voice.ChannelsCount;
VoiceCtx.WaveBuffers[0] = Voice.WaveBuffer0;
VoiceCtx.WaveBuffers[1] = Voice.WaveBuffer1;
VoiceCtx.WaveBuffers[2] = Voice.WaveBuffer2;
VoiceCtx.WaveBuffers[3] = Voice.WaveBuffer3;
VoiceCtx.Volume = Voice.Volume;
VoiceCtx.PlayState = Voice.PlayState;
UpdateDataHeader OutputHeader = new UpdateDataHeader();
int UpdateHeaderSize = Marshal.SizeOf<UpdateDataHeader>();
OutputHeader.Revision = IAudioRendererManager.RevMagic;
OutputHeader.BehaviorSize = 0xb0;
OutputHeader.MemoryPoolSize = (Params.EffectCount + Params.VoiceCount * 4) * 0x10;
OutputHeader.VoiceSize = Params.VoiceCount * 0x10;
OutputHeader.EffectSize = Params.EffectCount * 0x10;
OutputHeader.SinkSize = Params.SinkCount * 0x20;
OutputHeader.PerformanceManagerSize = 0x10;
OutputHeader.TotalSize = UpdateHeaderSize +
OutputHeader.BehaviorSize +
OutputHeader.MemoryPoolSize +
OutputHeader.VoiceSize +
OutputHeader.EffectSize +
OutputHeader.SinkSize +
foreach (MemoryPoolContext MemoryPool in MemoryPools)
foreach (VoiceContext Voice in Voices)
return 0;
public long StartAudioRenderer(ServiceCtx Context)
Context.Ns.Log.PrintStub(LogClass.ServiceAudio, "Stubbed.");
return 0;
public long StopAudioRenderer(ServiceCtx Context)
Context.Ns.Log.PrintStub(LogClass.ServiceAudio, "Stubbed.");
return 0;
public long QuerySystemEvent(ServiceCtx Context)
int Handle = Context.Process.HandleTable.OpenHandle(UpdateEvent);
Context.Response.HandleDesc = IpcHandleDesc.MakeCopy(Handle);
return 0;
private AdpcmDecoderContext GetAdpcmDecoderContext(long Position, long Size)
if (Size == 0)
return null;
AdpcmDecoderContext Context = new AdpcmDecoderContext();
Context.Coefficients = new short[Size >> 1];
for (int Offset = 0; Offset < Size; Offset += 2)
Context.Coefficients[Offset >> 1] = Memory.ReadInt16(Position + Offset);
return Context;
private void UpdateAudio()
long[] Released = AudioOut.GetReleasedBuffers(Track, 2);
for (int Index = 0; Index < Released.Length; Index++)
private void AppendMixedBuffer(long Tag)
int[] MixBuffer = new int[MixBufferSamplesCount * AudioConsts.HostChannelsCount];
foreach (VoiceContext Voice in Voices)
if (!Voice.Playing)
int OutOffset = 0;
int PendingSamples = MixBufferSamplesCount;
while (PendingSamples > 0)
int[] Samples = Voice.GetBufferData(Memory, PendingSamples, out int ReturnedSamples);
if (ReturnedSamples == 0)
PendingSamples -= ReturnedSamples;
for (int Offset = 0; Offset < Samples.Length; Offset++)
int Sample = (int)(Samples[Offset] * Voice.Volume);
MixBuffer[OutOffset++] += Sample;
AudioOut.AppendBuffer(Track, Tag, GetFinalBuffer(MixBuffer));
private static short[] GetFinalBuffer(int[] Buffer)
short[] Output = new short[Buffer.Length];
for (int Offset = 0; Offset < Buffer.Length; Offset++)
Output[Offset] = DspUtils.Saturate(Buffer[Offset]);
return Output;
public void Dispose()
protected virtual void Dispose(bool Disposing)
if (Disposing)
@ -0,0 +1,12 @@
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
class MemoryPoolContext
public MemoryPoolOut OutStatus;
public MemoryPoolContext()
OutStatus.State = MemoryPoolState.Detached;
Normal file
Normal file
@ -0,0 +1,14 @@
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
[StructLayout(LayoutKind.Sequential, Size = 0x20, Pack = 4)]
struct MemoryPoolIn
public long Address;
public long Size;
public MemoryPoolState State;
public int Unknown14;
public long Unknown18;
@ -0,0 +1,12 @@
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
[StructLayout(LayoutKind.Sequential, Size = 0x10, Pack = 4)]
struct MemoryPoolOut
public MemoryPoolState State;
public int Unknown14;
public long Unknown18;
@ -1,4 +1,4 @@
namespace Ryujinx.HLE.OsHle.Services.Aud
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
enum MemoryPoolState : int
@ -0,0 +1,9 @@
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
enum PlayState : byte
Playing = 0,
Stopped = 1,
Paused = 2
Normal file
Normal file
@ -0,0 +1,191 @@
using System;
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
static class Resampler
#region "LookUp Tables"
private static short[] CurveLut0 = new short[]
6600, 19426, 6722, 3, 6479, 19424, 6845, 9, 6359, 19419, 6968, 15, 6239, 19412, 7093, 22,
6121, 19403, 7219, 28, 6004, 19391, 7345, 34, 5888, 19377, 7472, 41, 5773, 19361, 7600, 48,
5659, 19342, 7728, 55, 5546, 19321, 7857, 62, 5434, 19298, 7987, 69, 5323, 19273, 8118, 77,
5213, 19245, 8249, 84, 5104, 19215, 8381, 92, 4997, 19183, 8513, 101, 4890, 19148, 8646, 109,
4785, 19112, 8780, 118, 4681, 19073, 8914, 127, 4579, 19031, 9048, 137, 4477, 18988, 9183, 147,
4377, 18942, 9318, 157, 4277, 18895, 9454, 168, 4179, 18845, 9590, 179, 4083, 18793, 9726, 190,
3987, 18738, 9863, 202, 3893, 18682, 10000, 215, 3800, 18624, 10137, 228, 3709, 18563, 10274, 241,
3618, 18500, 10411, 255, 3529, 18436, 10549, 270, 3441, 18369, 10687, 285, 3355, 18300, 10824, 300,
3269, 18230, 10962, 317, 3186, 18157, 11100, 334, 3103, 18082, 11238, 351, 3022, 18006, 11375, 369,
2942, 17927, 11513, 388, 2863, 17847, 11650, 408, 2785, 17765, 11788, 428, 2709, 17681, 11925, 449,
2635, 17595, 12062, 471, 2561, 17507, 12198, 494, 2489, 17418, 12334, 517, 2418, 17327, 12470, 541,
2348, 17234, 12606, 566, 2280, 17140, 12741, 592, 2213, 17044, 12876, 619, 2147, 16946, 13010, 647,
2083, 16846, 13144, 675, 2020, 16745, 13277, 704, 1958, 16643, 13409, 735, 1897, 16539, 13541, 766,
1838, 16434, 13673, 798, 1780, 16327, 13803, 832, 1723, 16218, 13933, 866, 1667, 16109, 14062, 901,
1613, 15998, 14191, 937, 1560, 15885, 14318, 975, 1508, 15772, 14445, 1013, 1457, 15657, 14571, 1052,
1407, 15540, 14695, 1093, 1359, 15423, 14819, 1134, 1312, 15304, 14942, 1177, 1266, 15185, 15064, 1221,
1221, 15064, 15185, 1266, 1177, 14942, 15304, 1312, 1134, 14819, 15423, 1359, 1093, 14695, 15540, 1407,
1052, 14571, 15657, 1457, 1013, 14445, 15772, 1508, 975, 14318, 15885, 1560, 937, 14191, 15998, 1613,
901, 14062, 16109, 1667, 866, 13933, 16218, 1723, 832, 13803, 16327, 1780, 798, 13673, 16434, 1838,
766, 13541, 16539, 1897, 735, 13409, 16643, 1958, 704, 13277, 16745, 2020, 675, 13144, 16846, 2083,
647, 13010, 16946, 2147, 619, 12876, 17044, 2213, 592, 12741, 17140, 2280, 566, 12606, 17234, 2348,
541, 12470, 17327, 2418, 517, 12334, 17418, 2489, 494, 12198, 17507, 2561, 471, 12062, 17595, 2635,
449, 11925, 17681, 2709, 428, 11788, 17765, 2785, 408, 11650, 17847, 2863, 388, 11513, 17927, 2942,
369, 11375, 18006, 3022, 351, 11238, 18082, 3103, 334, 11100, 18157, 3186, 317, 10962, 18230, 3269,
300, 10824, 18300, 3355, 285, 10687, 18369, 3441, 270, 10549, 18436, 3529, 255, 10411, 18500, 3618,
241, 10274, 18563, 3709, 228, 10137, 18624, 3800, 215, 10000, 18682, 3893, 202, 9863, 18738, 3987,
190, 9726, 18793, 4083, 179, 9590, 18845, 4179, 168, 9454, 18895, 4277, 157, 9318, 18942, 4377,
147, 9183, 18988, 4477, 137, 9048, 19031, 4579, 127, 8914, 19073, 4681, 118, 8780, 19112, 4785,
109, 8646, 19148, 4890, 101, 8513, 19183, 4997, 92, 8381, 19215, 5104, 84, 8249, 19245, 5213,
77, 8118, 19273, 5323, 69, 7987, 19298, 5434, 62, 7857, 19321, 5546, 55, 7728, 19342, 5659,
48, 7600, 19361, 5773, 41, 7472, 19377, 5888, 34, 7345, 19391, 6004, 28, 7219, 19403, 6121,
22, 7093, 19412, 6239, 15, 6968, 19419, 6359, 9, 6845, 19424, 6479, 3, 6722, 19426, 6600
private static short[] CurveLut1 = new short[]
-68, 32639, 69, -5, -200, 32630, 212, -15, -328, 32613, 359, -26, -450, 32586, 512, -36,
-568, 32551, 669, -47, -680, 32507, 832, -58, -788, 32454, 1000, -69, -891, 32393, 1174, -80,
-990, 32323, 1352, -92, -1084, 32244, 1536, -103, -1173, 32157, 1724, -115, -1258, 32061, 1919, -128,
-1338, 31956, 2118, -140, -1414, 31844, 2322, -153, -1486, 31723, 2532, -167, -1554, 31593, 2747, -180,
-1617, 31456, 2967, -194, -1676, 31310, 3192, -209, -1732, 31157, 3422, -224, -1783, 30995, 3657, -240,
-1830, 30826, 3897, -256, -1874, 30649, 4143, -272, -1914, 30464, 4393, -289, -1951, 30272, 4648, -307,
-1984, 30072, 4908, -325, -2014, 29866, 5172, -343, -2040, 29652, 5442, -362, -2063, 29431, 5716, -382,
-2083, 29203, 5994, -403, -2100, 28968, 6277, -424, -2114, 28727, 6565, -445, -2125, 28480, 6857, -468,
-2133, 28226, 7153, -490, -2139, 27966, 7453, -514, -2142, 27700, 7758, -538, -2142, 27428, 8066, -563,
-2141, 27151, 8378, -588, -2136, 26867, 8694, -614, -2130, 26579, 9013, -641, -2121, 26285, 9336, -668,
-2111, 25987, 9663, -696, -2098, 25683, 9993, -724, -2084, 25375, 10326, -753, -2067, 25063, 10662, -783,
-2049, 24746, 11000, -813, -2030, 24425, 11342, -844, -2009, 24100, 11686, -875, -1986, 23771, 12033, -907,
-1962, 23438, 12382, -939, -1937, 23103, 12733, -972, -1911, 22764, 13086, -1005, -1883, 22422, 13441, -1039,
-1855, 22077, 13798, -1072, -1825, 21729, 14156, -1107, -1795, 21380, 14516, -1141, -1764, 21027, 14877, -1176,
-1732, 20673, 15239, -1211, -1700, 20317, 15602, -1246, -1667, 19959, 15965, -1282, -1633, 19600, 16329, -1317,
-1599, 19239, 16694, -1353, -1564, 18878, 17058, -1388, -1530, 18515, 17423, -1424, -1495, 18151, 17787, -1459,
-1459, 17787, 18151, -1495, -1424, 17423, 18515, -1530, -1388, 17058, 18878, -1564, -1353, 16694, 19239, -1599,
-1317, 16329, 19600, -1633, -1282, 15965, 19959, -1667, -1246, 15602, 20317, -1700, -1211, 15239, 20673, -1732,
-1176, 14877, 21027, -1764, -1141, 14516, 21380, -1795, -1107, 14156, 21729, -1825, -1072, 13798, 22077, -1855,
-1039, 13441, 22422, -1883, -1005, 13086, 22764, -1911, -972, 12733, 23103, -1937, -939, 12382, 23438, -1962,
-907, 12033, 23771, -1986, -875, 11686, 24100, -2009, -844, 11342, 24425, -2030, -813, 11000, 24746, -2049,
-783, 10662, 25063, -2067, -753, 10326, 25375, -2084, -724, 9993, 25683, -2098, -696, 9663, 25987, -2111,
-668, 9336, 26285, -2121, -641, 9013, 26579, -2130, -614, 8694, 26867, -2136, -588, 8378, 27151, -2141,
-563, 8066, 27428, -2142, -538, 7758, 27700, -2142, -514, 7453, 27966, -2139, -490, 7153, 28226, -2133,
-468, 6857, 28480, -2125, -445, 6565, 28727, -2114, -424, 6277, 28968, -2100, -403, 5994, 29203, -2083,
-382, 5716, 29431, -2063, -362, 5442, 29652, -2040, -343, 5172, 29866, -2014, -325, 4908, 30072, -1984,
-307, 4648, 30272, -1951, -289, 4393, 30464, -1914, -272, 4143, 30649, -1874, -256, 3897, 30826, -1830,
-240, 3657, 30995, -1783, -224, 3422, 31157, -1732, -209, 3192, 31310, -1676, -194, 2967, 31456, -1617,
-180, 2747, 31593, -1554, -167, 2532, 31723, -1486, -153, 2322, 31844, -1414, -140, 2118, 31956, -1338,
-128, 1919, 32061, -1258, -115, 1724, 32157, -1173, -103, 1536, 32244, -1084, -92, 1352, 32323, -990,
-80, 1174, 32393, -891, -69, 1000, 32454, -788, -58, 832, 32507, -680, -47, 669, 32551, -568,
-36, 512, 32586, -450, -26, 359, 32613, -328, -15, 212, 32630, -200, -5, 69, 32639, -68
private static short[] CurveLut2 = new short[]
3195, 26287, 3329, -32, 3064, 26281, 3467, -34, 2936, 26270, 3608, -38, 2811, 26253, 3751, -42,
2688, 26230, 3897, -46, 2568, 26202, 4046, -50, 2451, 26169, 4199, -54, 2338, 26130, 4354, -58,
2227, 26085, 4512, -63, 2120, 26035, 4673, -67, 2015, 25980, 4837, -72, 1912, 25919, 5004, -76,
1813, 25852, 5174, -81, 1716, 25780, 5347, -87, 1622, 25704, 5522, -92, 1531, 25621, 5701, -98,
1442, 25533, 5882, -103, 1357, 25440, 6066, -109, 1274, 25342, 6253, -115, 1193, 25239, 6442, -121,
1115, 25131, 6635, -127, 1040, 25018, 6830, -133, 967, 24899, 7027, -140, 897, 24776, 7227, -146,
829, 24648, 7430, -153, 764, 24516, 7635, -159, 701, 24379, 7842, -166, 641, 24237, 8052, -174,
583, 24091, 8264, -181, 526, 23940, 8478, -187, 472, 23785, 8695, -194, 420, 23626, 8914, -202,
371, 23462, 9135, -209, 324, 23295, 9358, -215, 279, 23123, 9583, -222, 236, 22948, 9809, -230,
194, 22769, 10038, -237, 154, 22586, 10269, -243, 117, 22399, 10501, -250, 81, 22208, 10735, -258,
47, 22015, 10970, -265, 15, 21818, 11206, -271, -16, 21618, 11444, -277, -44, 21415, 11684, -283,
-71, 21208, 11924, -290, -97, 20999, 12166, -296, -121, 20786, 12409, -302, -143, 20571, 12653, -306,
-163, 20354, 12898, -311, -183, 20134, 13143, -316, -201, 19911, 13389, -321, -218, 19686, 13635, -325,
-234, 19459, 13882, -328, -248, 19230, 14130, -332, -261, 18998, 14377, -335, -273, 18765, 14625, -337,
-284, 18531, 14873, -339, -294, 18295, 15121, -341, -302, 18057, 15369, -341, -310, 17817, 15617, -341,
-317, 17577, 15864, -340, -323, 17335, 16111, -340, -328, 17092, 16357, -338, -332, 16848, 16603, -336,
-336, 16603, 16848, -332, -338, 16357, 17092, -328, -340, 16111, 17335, -323, -340, 15864, 17577, -317,
-341, 15617, 17817, -310, -341, 15369, 18057, -302, -341, 15121, 18295, -294, -339, 14873, 18531, -284,
-337, 14625, 18765, -273, -335, 14377, 18998, -261, -332, 14130, 19230, -248, -328, 13882, 19459, -234,
-325, 13635, 19686, -218, -321, 13389, 19911, -201, -316, 13143, 20134, -183, -311, 12898, 20354, -163,
-306, 12653, 20571, -143, -302, 12409, 20786, -121, -296, 12166, 20999, -97, -290, 11924, 21208, -71,
-283, 11684, 21415, -44, -277, 11444, 21618, -16, -271, 11206, 21818, 15, -265, 10970, 22015, 47,
-258, 10735, 22208, 81, -250, 10501, 22399, 117, -243, 10269, 22586, 154, -237, 10038, 22769, 194,
-230, 9809, 22948, 236, -222, 9583, 23123, 279, -215, 9358, 23295, 324, -209, 9135, 23462, 371,
-202, 8914, 23626, 420, -194, 8695, 23785, 472, -187, 8478, 23940, 526, -181, 8264, 24091, 583,
-174, 8052, 24237, 641, -166, 7842, 24379, 701, -159, 7635, 24516, 764, -153, 7430, 24648, 829,
-146, 7227, 24776, 897, -140, 7027, 24899, 967, -133, 6830, 25018, 1040, -127, 6635, 25131, 1115,
-121, 6442, 25239, 1193, -115, 6253, 25342, 1274, -109, 6066, 25440, 1357, -103, 5882, 25533, 1442,
-98, 5701, 25621, 1531, -92, 5522, 25704, 1622, -87, 5347, 25780, 1716, -81, 5174, 25852, 1813,
-76, 5004, 25919, 1912, -72, 4837, 25980, 2015, -67, 4673, 26035, 2120, -63, 4512, 26085, 2227,
-58, 4354, 26130, 2338, -54, 4199, 26169, 2451, -50, 4046, 26202, 2568, -46, 3897, 26230, 2688,
-42, 3751, 26253, 2811, -38, 3608, 26270, 2936, -34, 3467, 26281, 3064, -32, 3329, 26287, 3195
public static int[] Resample2Ch(
int[] Buffer,
int SrcSampleRate,
int DstSampleRate,
int SamplesCount,
ref int FracPart)
if (Buffer == null)
throw new ArgumentNullException(nameof(Buffer));
if (SrcSampleRate <= 0)
throw new ArgumentOutOfRangeException(nameof(SrcSampleRate));
if (DstSampleRate <= 0)
throw new ArgumentOutOfRangeException(nameof(DstSampleRate));
double Ratio = (double)SrcSampleRate / DstSampleRate;
int NewSamplesCount = (int)(SamplesCount / Ratio);
int Step = (int)(Ratio * 0x8000);
int[] Output = new int[NewSamplesCount * 2];
short[] Lut;
if (Step > 0xaaaa)
Lut = CurveLut0;
else if (Step <= 0x8000)
Lut = CurveLut1;
Lut = CurveLut2;
int InOffs = 0;
for (int OutOffs = 0; OutOffs < Output.Length; OutOffs += 2)
int LutIndex = (FracPart >> 8) * 4;
int Sample0 = Buffer[(InOffs + 0) * 2 + 0] * Lut[LutIndex + 0] +
Buffer[(InOffs + 1) * 2 + 0] * Lut[LutIndex + 1] +
Buffer[(InOffs + 2) * 2 + 0] * Lut[LutIndex + 2] +
Buffer[(InOffs + 3) * 2 + 0] * Lut[LutIndex + 3];
int Sample1 = Buffer[(InOffs + 0) * 2 + 1] * Lut[LutIndex + 0] +
Buffer[(InOffs + 1) * 2 + 1] * Lut[LutIndex + 1] +
Buffer[(InOffs + 2) * 2 + 1] * Lut[LutIndex + 2] +
Buffer[(InOffs + 3) * 2 + 1] * Lut[LutIndex + 3];
int NewOffset = FracPart + Step;
InOffs += NewOffset >> 15;
FracPart = NewOffset & 0x7fff;
Output[OutOffs + 0] = Sample0 >> 15;
Output[OutOffs + 1] = Sample1 >> 15;
return Output;
@ -1,15 +1,15 @@
namespace Ryujinx.HLE.OsHle.Services.Aud
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
struct UpdateDataHeader
public int Revision;
public int BehaviorSize;
public int MemoryPoolsSize;
public int VoicesSize;
public int MemoryPoolSize;
public int VoiceSize;
public int VoiceResourceSize;
public int EffectsSize;
public int MixesSize;
public int SinksSize;
public int EffectSize;
public int MixeSize;
public int SinkSize;
public int PerformanceManagerSize;
public int Unknown24;
public int Unknown28;
@ -0,0 +1,10 @@
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
[StructLayout(LayoutKind.Sequential, Size = 0x70, Pack = 1)]
struct VoiceChannelResourceIn
Normal file
Normal file
@ -0,0 +1,188 @@
using ChocolArm64.Memory;
using Ryujinx.Audio.Adpcm;
using System;
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
class VoiceContext
private bool Acquired;
private bool BufferReload;
private int ResamplerFracPart;
private int BufferIndex;
private int Offset;
public int SampleRate;
public int ChannelsCount;
public float Volume;
public PlayState PlayState;
public SampleFormat SampleFormat;
public AdpcmDecoderContext AdpcmCtx;
public WaveBuffer[] WaveBuffers;
public VoiceOut OutStatus;
private int[] Samples;
public bool Playing => Acquired && PlayState == PlayState.Playing;
public VoiceContext()
WaveBuffers = new WaveBuffer[4];
public void SetAcquireState(bool NewState)
if (Acquired && !NewState)
Acquired = NewState;
private void Reset()
BufferReload = true;
BufferIndex = 0;
Offset = 0;
OutStatus.PlayedSamplesCount = 0;
OutStatus.PlayedWaveBuffersCount = 0;
OutStatus.VoiceDropsCount = 0;
public int[] GetBufferData(AMemory Memory, int MaxSamples, out int SamplesCount)
if (!Playing)
SamplesCount = 0;
return null;
if (BufferReload)
BufferReload = false;
WaveBuffer Wb = WaveBuffers[BufferIndex];
int MaxSize = Samples.Length - Offset;
int Size = MaxSamples * AudioConsts.HostChannelsCount;
if (Size > MaxSize)
Size = MaxSize;
int[] Output = new int[Size];
Array.Copy(Samples, Offset, Output, 0, Size);
SamplesCount = Size / AudioConsts.HostChannelsCount;
OutStatus.PlayedSamplesCount += SamplesCount;
Offset += Size;
if (Offset == Samples.Length)
Offset = 0;
if (Wb.Looping == 0)
SetBufferIndex((BufferIndex + 1) & 3);
if (Wb.LastBuffer != 0)
PlayState = PlayState.Paused;
return Output;
private void UpdateBuffer(AMemory Memory)
//TODO: Implement conversion for formats other
//than interleaved stereo (2 channels).
//As of now, it assumes that HostChannelsCount == 2.
WaveBuffer Wb = WaveBuffers[BufferIndex];
if (SampleFormat == SampleFormat.PcmInt16)
int SamplesCount = (int)(Wb.Size / (sizeof(short) * ChannelsCount));
Samples = new int[SamplesCount * AudioConsts.HostChannelsCount];
if (ChannelsCount == 1)
for (int Index = 0; Index < SamplesCount; Index++)
short Sample = Memory.ReadInt16(Wb.Position + Index * 2);
Samples[Index * 2 + 0] = Sample;
Samples[Index * 2 + 1] = Sample;
for (int Index = 0; Index < SamplesCount * 2; Index++)
Samples[Index] = Memory.ReadInt16(Wb.Position + Index * 2);
else if (SampleFormat == SampleFormat.Adpcm)
byte[] Buffer = Memory.ReadBytes(Wb.Position, Wb.Size);
Samples = AdpcmDecoder.Decode(Buffer, AdpcmCtx);
throw new InvalidOperationException();
if (SampleRate != AudioConsts.HostSampleRate)
//TODO: We should keep the frames being discarded (see the 4 below)
//on a buffer and include it on the next samples buffer, to allow
//the resampler to do seamless interpolation between wave buffers.
int SamplesCount = Samples.Length / AudioConsts.HostChannelsCount;
SamplesCount = Math.Max(SamplesCount - 4, 0);
Samples = Resampler.Resample2Ch(
ref ResamplerFracPart);
public void SetBufferIndex(int Index)
BufferIndex = Index & 3;
BufferReload = true;
Normal file
Normal file
@ -0,0 +1,49 @@
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.OsHle.Services.Aud.AudioRenderer
[StructLayout(LayoutKind.Sequential, Size = 0x170, Pack = 1)]
struct VoiceIn
public int VoiceSlot;
public int NodeId;
public byte FirstUpdate;
public byte Acquired;
public PlayState PlayState;
public SampleFormat SampleFormat;
public int SampleRate;
public int Priority;
public int Unknown14;
public int ChannelsCount;
public float Pitch;
public float Volume;
public BiquadFilter BiquadFilter0;
public BiquadFilter BiquadFilter1;
public int AppendedWaveBuffersCount;
public int BaseWaveBufferIndex;
public int Unknown44;
public long AdpcmCoeffsPosition;
public long AdpcmCoeffsSize;
public int VoiceDestination;
public int Padding;
public WaveBuffer WaveBuffer0;
public WaveBuffer WaveBuffer1;
public WaveBuffer WaveBuffer2;
public WaveBuffer WaveBuffer3;
Some files were not shown because too many files have changed in this diff Show more
Reference in a new issue