early-access version 1279

This commit is contained in:
pineappleEA 2021-01-02 22:47:26 +01:00
parent 84d5e05316
commit f7b2c59575
35 changed files with 399 additions and 112 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 1277. This is the source code for early-access 1279.
## Legal Notice ## Legal Notice

View file

@ -39,6 +39,9 @@ enum class OptimizationFlag : std::uint32_t {
/// This is an UNSAFE optimization that reduces accuracy of certain floating-point instructions. /// This is an UNSAFE optimization that reduces accuracy of certain floating-point instructions.
/// This allows results of FRECPE and FRSQRTE to have **less** error than spec allows. /// This allows results of FRECPE and FRSQRTE to have **less** error than spec allows.
Unsafe_ReducedErrorFP = 0x00020000, Unsafe_ReducedErrorFP = 0x00020000,
/// This is an UNSAFE optimization that causes floating-point instructions to not produce correct NaNs.
/// This may also result in inaccurate results when instructions are given certain special values.
Unsafe_InaccurateNaN = 0x00040000,
}; };
constexpr OptimizationFlag no_optimizations = static_cast<OptimizationFlag>(0); constexpr OptimizationFlag no_optimizations = static_cast<OptimizationFlag>(0);

View file

@ -1080,29 +1080,40 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto& carry_in = args[2]; auto& carry_in = args[2];
const bool is_cmp = inst->UseCount() == size_t(!!carry_inst + !!overflow_inst + !!nzcv_inst) && carry_in.IsImmediate() && carry_in.GetImmediateU1();
const Xbyak::Reg64 nzcv = DoNZCV(code, ctx.reg_alloc, nzcv_inst); const Xbyak::Reg64 nzcv = DoNZCV(code, ctx.reg_alloc, nzcv_inst);
const Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(bitsize); const Xbyak::Reg result = (is_cmp ? ctx.reg_alloc.UseGpr(args[0]) : ctx.reg_alloc.UseScratchGpr(args[0])).changeBit(bitsize);
const Xbyak::Reg8 carry = DoCarry(ctx.reg_alloc, carry_in, carry_inst); const Xbyak::Reg8 carry = DoCarry(ctx.reg_alloc, carry_in, carry_inst);
const Xbyak::Reg8 overflow = overflow_inst ? ctx.reg_alloc.ScratchGpr().cvt8() : Xbyak::Reg8{-1}; const Xbyak::Reg8 overflow = overflow_inst ? ctx.reg_alloc.ScratchGpr().cvt8() : Xbyak::Reg8{-1};
// TODO: Consider using LEA. // TODO: Consider using LEA.
// TODO: Optimize CMP case.
// Note that x64 CF is inverse of what the ARM carry flag is here. // Note that x64 CF is inverse of what the ARM carry flag is here.
bool invert_output_carry = true;
if (is_cmp) {
if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) { if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) {
const u32 op_arg = args[1].GetImmediateU32();
code.cmp(result, op_arg);
} else {
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]);
op_arg.setBit(bitsize);
code.cmp(result, *op_arg);
}
} else if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) {
const u32 op_arg = args[1].GetImmediateU32(); const u32 op_arg = args[1].GetImmediateU32();
if (carry_in.IsImmediate()) { if (carry_in.IsImmediate()) {
if (carry_in.GetImmediateU1()) { if (carry_in.GetImmediateU1()) {
code.sub(result, op_arg); code.sub(result, op_arg);
} else { } else {
code.stc(); code.add(result, ~op_arg);
code.sbb(result, op_arg); invert_output_carry = false;
} }
} else { } else {
code.bt(carry.cvt32(), 0); code.bt(carry.cvt32(), 0);
code.cmc(); code.adc(result, ~op_arg);
code.sbb(result, op_arg); invert_output_carry = false;
} }
} else { } else {
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]);
@ -1122,14 +1133,20 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit
} }
if (nzcv_inst) { if (nzcv_inst) {
if (invert_output_carry) {
code.cmc(); code.cmc();
}
code.lahf(); code.lahf();
code.seto(code.al); code.seto(code.al);
ctx.reg_alloc.DefineValue(nzcv_inst, nzcv); ctx.reg_alloc.DefineValue(nzcv_inst, nzcv);
ctx.EraseInstruction(nzcv_inst); ctx.EraseInstruction(nzcv_inst);
} }
if (carry_inst) { if (carry_inst) {
if (invert_output_carry) {
code.setnc(carry); code.setnc(carry);
} else {
code.setc(carry);
}
ctx.reg_alloc.DefineValue(carry_inst, carry); ctx.reg_alloc.DefineValue(carry_inst, carry);
ctx.EraseInstruction(carry_inst); ctx.EraseInstruction(carry_inst);
} }
@ -1138,8 +1155,9 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit
ctx.reg_alloc.DefineValue(overflow_inst, overflow); ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.EraseInstruction(overflow_inst); ctx.EraseInstruction(overflow_inst);
} }
if (!is_cmp) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
}
} }
void EmitX64::EmitSub32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitSub32(EmitContext& ctx, IR::Inst* inst) {

View file

@ -257,7 +257,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
if (!ctx.FPCR().DN()) { if (!ctx.FPCR().DN() && !ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
end = ProcessNaN<fsize>(code, result); end = ProcessNaN<fsize>(code, result);
} }
if constexpr (std::is_member_function_pointer_v<Function>) { if constexpr (std::is_member_function_pointer_v<Function>) {
@ -265,7 +265,9 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
} else { } else {
fn(result); fn(result);
} }
if (ctx.FPCR().DN()) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
// Do nothing
} else if (ctx.FPCR().DN()) {
ForceToDefaultNaN<fsize>(code, result); ForceToDefaultNaN<fsize>(code, result);
} else { } else {
PostProcessNaN<fsize>(code, result, ctx.reg_alloc.ScratchXmm()); PostProcessNaN<fsize>(code, result, ctx.reg_alloc.ScratchXmm());
@ -281,6 +283,20 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
if constexpr (std::is_member_function_pointer_v<Function>) {
(code.*fn)(result, operand);
} else {
fn(result, operand);
}
ctx.reg_alloc.DefineValue(inst, result);
return;
}
if (ctx.FPCR().DN()) { if (ctx.FPCR().DN()) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
@ -590,9 +606,20 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
using FPT = mp::unsigned_integer_of_size<fsize>; using FPT = mp::unsigned_integer_of_size<fsize>;
if constexpr (fsize != 16) { if constexpr (fsize != 16) {
if (code.HasFMA()) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasFMA() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
FCODE(vfmadd231s)(result, operand2, operand3);
ctx.reg_alloc.DefineValue(inst, result);
return;
}
if (code.HasFMA()) {
Xbyak::Label end, fallback; Xbyak::Label end, fallback;
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
@ -641,8 +668,6 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
} }
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
@ -810,6 +835,22 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
using FPT = mp::unsigned_integer_of_size<fsize>; using FPT = mp::unsigned_integer_of_size<fsize>;
if constexpr (fsize != 16) { if constexpr (fsize != 16) {
if (code.HasFMA() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Label end, fallback;
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 2>()));
FCODE(vfnmadd231s)(result, operand1, operand2);
ctx.reg_alloc.DefineValue(inst, result);
return;
}
if (code.HasFMA()) { if (code.HasFMA()) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -998,6 +1039,21 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
using FPT = mp::unsigned_integer_of_size<fsize>; using FPT = mp::unsigned_integer_of_size<fsize>;
if constexpr (fsize != 16) { if constexpr (fsize != 16) {
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
code.vmovaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 3>()));
FCODE(vfnmadd231s)(result, operand1, operand2);
FCODE(vmuls)(result, result, code.MConst(xword, FP::FPValue<FPT, false, -1, 1>()));
ctx.reg_alloc.DefineValue(inst, result);
return;
}
if (code.HasFMA() && code.HasAVX()) { if (code.HasFMA() && code.HasAVX()) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);

View file

@ -290,7 +290,7 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1(); const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
if (ctx.FPCR(fpcr_controlled).DN()) { if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
Xbyak::Xmm result; Xbyak::Xmm result;
if constexpr (std::is_member_function_pointer_v<Function>) { if constexpr (std::is_member_function_pointer_v<Function>) {
@ -306,7 +306,9 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
}); });
} }
if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result); ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
}
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
return; return;
@ -342,7 +344,7 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1(); const bool fpcr_controlled = args[2].GetImmediateU1();
if (ctx.FPCR(fpcr_controlled).DN()) { if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
@ -356,7 +358,9 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
}); });
} }
if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), xmm_a); ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), xmm_a);
}
ctx.reg_alloc.DefineValue(inst, xmm_a); ctx.reg_alloc.DefineValue(inst, xmm_a);
return; return;
@ -985,11 +989,23 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
}; };
if constexpr (fsize != 16) { if constexpr (fsize != 16) {
if (code.HasFMA() && code.HasAVX()) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[3].GetImmediateU1(); const bool fpcr_controlled = args[3].GetImmediateU1();
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
FCODE(vfmadd231p)(result, xmm_b, xmm_c);
});
ctx.reg_alloc.DefineValue(inst, result);
return;
}
if (code.HasFMA() && code.HasAVX()) {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
@ -1025,8 +1041,6 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
} }
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
@ -1233,10 +1247,24 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
}; };
if constexpr (fsize != 16) { if constexpr (fsize != 16) {
if (code.HasFMA() && code.HasAVX()) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1(); const bool fpcr_controlled = args[2].GetImmediateU1();
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
FCODE(vfnmadd231p)(result, operand1, operand2);
});
ctx.reg_alloc.DefineValue(inst, result);
return;
}
if (code.HasFMA() && code.HasAVX()) {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
@ -1269,8 +1297,6 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
} }
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
@ -1428,10 +1454,25 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
}; };
if constexpr (fsize != 16) { if constexpr (fsize != 16) {
if (code.HasFMA() && code.HasAVX()) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1(); const bool fpcr_controlled = args[2].GetImmediateU1();
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
FCODE(vfnmadd231p)(result, operand1, operand2);
FCODE(vmulp)(result, result, GetVectorOf<fsize, false, -1, 1>(code));
});
ctx.reg_alloc.DefineValue(inst, result);
return;
}
if (code.HasFMA() && code.HasAVX()) {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
@ -1470,8 +1511,6 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
} }
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();

View file

@ -27,7 +27,7 @@ bool TranslatorVisitor::CCMP_reg(bool sf, Reg Rm, Cond cond, Reg Rn, Imm<4> nzcv
const IR::U32U64 operand1 = X(datasize, Rn); const IR::U32U64 operand1 = X(datasize, Rn);
const IR::U32U64 operand2 = X(datasize, Rm); const IR::U32U64 operand2 = X(datasize, Rm);
const IR::NZCV then_flags = ir.NZCVFrom(ir.AddWithCarry(operand1, ir.Not(operand2), ir.Imm1(1))); const IR::NZCV then_flags = ir.NZCVFrom(ir.SubWithCarry(operand1, operand2, ir.Imm1(1)));
const IR::NZCV else_flags = ir.NZCVFromPackedFlags(ir.Imm32(flags)); const IR::NZCV else_flags = ir.NZCVFromPackedFlags(ir.Imm32(flags));
ir.SetNZCV(ir.ConditionalSelect(cond, then_flags, else_flags)); ir.SetNZCV(ir.ConditionalSelect(cond, then_flags, else_flags));
return true; return true;
@ -53,7 +53,7 @@ bool TranslatorVisitor::CCMP_imm(bool sf, Imm<5> imm5, Cond cond, Reg Rn, Imm<4>
const IR::U32U64 operand1 = X(datasize, Rn); const IR::U32U64 operand1 = X(datasize, Rn);
const IR::U32U64 operand2 = I(datasize, imm5.ZeroExtend<u32>()); const IR::U32U64 operand2 = I(datasize, imm5.ZeroExtend<u32>());
const IR::NZCV then_flags = ir.NZCVFrom(ir.AddWithCarry(operand1, ir.Not(operand2), ir.Imm1(1))); const IR::NZCV then_flags = ir.NZCVFrom(ir.SubWithCarry(operand1, operand2, ir.Imm1(1)));
const IR::NZCV else_flags = ir.NZCVFromPackedFlags(ir.Imm32(flags)); const IR::NZCV else_flags = ir.NZCVFromPackedFlags(ir.Imm32(flags));
ir.SetNZCV(ir.ConditionalSelect(cond, then_flags, else_flags)); ir.SetNZCV(ir.ConditionalSelect(cond, then_flags, else_flags));
return true; return true;

View file

@ -98,7 +98,6 @@ add_library(common STATIC
algorithm.h algorithm.h
alignment.h alignment.h
assert.h assert.h
atomic_ops.cpp
atomic_ops.h atomic_ops.h
detached_tasks.cpp detached_tasks.cpp
detached_tasks.h detached_tasks.h

View file

@ -4,14 +4,75 @@
#pragma once #pragma once
#include <cstring>
#include <memory>
#include "common/common_types.h" #include "common/common_types.h"
#if _MSC_VER
#include <intrin.h>
#endif
namespace Common { namespace Common {
[[nodiscard]] bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected); #if _MSC_VER
[[nodiscard]] bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected);
[[nodiscard]] bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected); [[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
[[nodiscard]] bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected); const u8 result =
[[nodiscard]] bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected); _InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
return result == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
const u16 result =
_InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
return result == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
const u32 result =
_InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
return result == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
const u64 result = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer),
value, expected);
return result == expected;
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
return _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
value[0],
reinterpret_cast<__int64*>(expected.data())) != 0;
}
#else
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
return __sync_bool_compare_and_swap(pointer, expected, value);
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
return __sync_bool_compare_and_swap(pointer, expected, value);
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
return __sync_bool_compare_and_swap(pointer, expected, value);
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
return __sync_bool_compare_and_swap(pointer, expected, value);
}
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
unsigned __int128 value_a;
unsigned __int128 expected_a;
std::memcpy(&value_a, value.data(), sizeof(u128));
std::memcpy(&expected_a, expected.data(), sizeof(u128));
return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
}
#endif
} // namespace Common } // namespace Common

View file

@ -90,7 +90,7 @@ struct PageTable {
PageTable& operator=(PageTable&&) noexcept = default; PageTable& operator=(PageTable&&) noexcept = default;
/** /**
* Resizes the page table to be able to accomodate enough pages within * Resizes the page table to be able to accommodate enough pages within
* a given address space. * a given address space.
* *
* @param address_space_width_in_bits The address size width in bits. * @param address_space_width_in_bits The address size width in bits.

View file

@ -394,7 +394,7 @@ public:
template <typename S, typename T2, typename F2> template <typename S, typename T2, typename F2>
friend S operator%(const S& p, const swapped_t v); friend S operator%(const S& p, const swapped_t v);
// Arithmetics + assignements // Arithmetics + assignments
template <typename S, typename T2, typename F2> template <typename S, typename T2, typename F2>
friend S operator+=(const S& p, const swapped_t v); friend S operator+=(const S& p, const swapped_t v);
@ -451,7 +451,7 @@ S operator%(const S& i, const swap_struct_t<T, F> v) {
return i % v.swap(); return i % v.swap();
} }
// Arithmetics + assignements // Arithmetics + assignments
template <typename S, typename T, typename F> template <typename S, typename T, typename F>
S& operator+=(S& i, const swap_struct_t<T, F> v) { S& operator+=(S& i, const swap_struct_t<T, F> v) {
i += v.swap(); i += v.swap();

View file

@ -2,19 +2,74 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <array>
#include <chrono> #include <chrono>
#include <limits>
#include <mutex> #include <mutex>
#include <thread> #include <thread>
#ifdef _MSC_VER #ifdef _MSC_VER
#include <intrin.h> #include <intrin.h>
#pragma intrinsic(__umulh)
#pragma intrinsic(_udiv128)
#else #else
#include <x86intrin.h> #include <x86intrin.h>
#endif #endif
#include "common/atomic_ops.h"
#include "common/uint128.h" #include "common/uint128.h"
#include "common/x64/native_clock.h" #include "common/x64/native_clock.h"
namespace {
[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
#ifdef __SIZEOF_INT128__
const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
return static_cast<u64>(base / divisor);
#elif defined(_M_X64) || defined(_M_ARM64)
std::array<u64, 2> r = {0, numerator};
u64 remainder;
#if _MSC_VER < 1923
return udiv128(r[1], r[0], divisor, &remainder);
#else
return _udiv128(r[1], r[0], divisor, &remainder);
#endif
#else
// This one is bit more inaccurate.
return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
#endif
}
[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) {
#ifdef __SIZEOF_INT128__
return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
#elif defined(_M_X64) || defined(_M_ARM64)
return __umulh(a, b); // MSVC
#else
// Generic fallback
const u64 a_lo = u32(a);
const u64 a_hi = a >> 32;
const u64 b_lo = u32(b);
const u64 b_hi = b >> 32;
const u64 a_x_b_hi = a_hi * b_hi;
const u64 a_x_b_mid = a_hi * b_lo;
const u64 b_x_a_mid = b_hi * a_lo;
const u64 a_x_b_lo = a_lo * b_lo;
const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
32;
const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
return multhi;
#endif
}
} // namespace
namespace Common { namespace Common {
u64 EstimateRDTSCFrequency() { u64 EstimateRDTSCFrequency() {
@ -48,54 +103,71 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
rtsc_frequency_} { rtsc_frequency_} {
_mm_mfence(); _mm_mfence();
last_measure = __rdtsc(); time_point.inner.last_measure = __rdtsc();
accumulated_ticks = 0U; time_point.inner.accumulated_ticks = 0U;
ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency);
us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency);
ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency);
clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency);
cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency);
} }
u64 NativeClock::GetRTSC() { u64 NativeClock::GetRTSC() {
std::scoped_lock scope{rtsc_serialize}; TimePoint new_time_point{};
TimePoint current_time_point{};
do {
current_time_point.pack = time_point.pack;
_mm_mfence(); _mm_mfence();
const u64 current_measure = __rdtsc(); const u64 current_measure = __rdtsc();
u64 diff = current_measure - last_measure; u64 diff = current_measure - current_time_point.inner.last_measure;
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
if (current_measure > last_measure) { new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
last_measure = current_measure; ? current_measure
} : current_time_point.inner.last_measure;
accumulated_ticks += diff; new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
current_time_point.pack));
/// The clock cannot be more precise than the guest timer, remove the lower bits /// The clock cannot be more precise than the guest timer, remove the lower bits
return accumulated_ticks & inaccuracy_mask; return new_time_point.inner.accumulated_ticks & inaccuracy_mask;
} }
void NativeClock::Pause(bool is_paused) { void NativeClock::Pause(bool is_paused) {
if (!is_paused) { if (!is_paused) {
TimePoint current_time_point{};
TimePoint new_time_point{};
do {
current_time_point.pack = time_point.pack;
new_time_point.pack = current_time_point.pack;
_mm_mfence(); _mm_mfence();
last_measure = __rdtsc(); new_time_point.inner.last_measure = __rdtsc();
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
current_time_point.pack));
} }
} }
std::chrono::nanoseconds NativeClock::GetTimeNS() { std::chrono::nanoseconds NativeClock::GetTimeNS() {
const u64 rtsc_value = GetRTSC(); const u64 rtsc_value = GetRTSC();
return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)};
} }
std::chrono::microseconds NativeClock::GetTimeUS() { std::chrono::microseconds NativeClock::GetTimeUS() {
const u64 rtsc_value = GetRTSC(); const u64 rtsc_value = GetRTSC();
return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)};
} }
std::chrono::milliseconds NativeClock::GetTimeMS() { std::chrono::milliseconds NativeClock::GetTimeMS() {
const u64 rtsc_value = GetRTSC(); const u64 rtsc_value = GetRTSC();
return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)};
} }
u64 NativeClock::GetClockCycles() { u64 NativeClock::GetClockCycles() {
const u64 rtsc_value = GetRTSC(); const u64 rtsc_value = GetRTSC();
return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); return MultiplyHigh(rtsc_value, clock_rtsc_factor);
} }
u64 NativeClock::GetCPUCycles() { u64 NativeClock::GetCPUCycles() {
const u64 rtsc_value = GetRTSC(); const u64 rtsc_value = GetRTSC();
return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); return MultiplyHigh(rtsc_value, cpu_rtsc_factor);
} }
} // namespace X64 } // namespace X64

View file

@ -6,7 +6,6 @@
#include <optional> #include <optional>
#include "common/spin_lock.h"
#include "common/wall_clock.h" #include "common/wall_clock.h"
namespace Common { namespace Common {
@ -32,14 +31,28 @@ public:
private: private:
u64 GetRTSC(); u64 GetRTSC();
union alignas(16) TimePoint {
TimePoint() : pack{} {}
u128 pack{};
struct Inner {
u64 last_measure{};
u64 accumulated_ticks{};
} inner;
};
/// value used to reduce the native clocks accuracy as some apss rely on /// value used to reduce the native clocks accuracy as some apss rely on
/// undefined behavior where the level of accuracy in the clock shouldn't /// undefined behavior where the level of accuracy in the clock shouldn't
/// be higher. /// be higher.
static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1); static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);
SpinLock rtsc_serialize{}; TimePoint time_point;
u64 last_measure{}; // factors
u64 accumulated_ticks{}; u64 clock_rtsc_factor{};
u64 cpu_rtsc_factor{};
u64 ns_rtsc_factor{};
u64 us_rtsc_factor{};
u64 ms_rtsc_factor{};
u64 rtsc_frequency; u64 rtsc_frequency;
}; };
} // namespace X64 } // namespace X64

View file

@ -181,6 +181,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
if (Settings::values.cpuopt_unsafe_reduce_fp_error) { if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
} }
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
}
} }
return std::make_unique<Dynarmic::A32::Jit>(config); return std::make_unique<Dynarmic::A32::Jit>(config);

View file

@ -212,6 +212,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
if (Settings::values.cpuopt_unsafe_reduce_fp_error) { if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
} }
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
}
} }
return std::make_shared<Dynarmic::A64::Jit>(config); return std::make_shared<Dynarmic::A64::Jit>(config);

View file

@ -73,11 +73,11 @@ enum class MemoryState : u32 {
ThreadLocal = ThreadLocal =
static_cast<u32>(Svc::MemoryState::ThreadLocal) | FlagMapped | FlagReferenceCounted, static_cast<u32>(Svc::MemoryState::ThreadLocal) | FlagMapped | FlagReferenceCounted,
Transfered = static_cast<u32>(Svc::MemoryState::Transfered) | FlagsMisc | Transferred = static_cast<u32>(Svc::MemoryState::Transferred) | FlagsMisc |
FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc | FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc |
FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,
SharedTransfered = static_cast<u32>(Svc::MemoryState::SharedTransfered) | FlagsMisc | SharedTransferred = static_cast<u32>(Svc::MemoryState::SharedTransferred) | FlagsMisc |
FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,
SharedCode = static_cast<u32>(Svc::MemoryState::SharedCode) | FlagMapped | SharedCode = static_cast<u32>(Svc::MemoryState::SharedCode) | FlagMapped |
@ -111,8 +111,8 @@ static_assert(static_cast<u32>(MemoryState::AliasCodeData) == 0x03FFBD09);
static_assert(static_cast<u32>(MemoryState::Ipc) == 0x005C3C0A); static_assert(static_cast<u32>(MemoryState::Ipc) == 0x005C3C0A);
static_assert(static_cast<u32>(MemoryState::Stack) == 0x005C3C0B); static_assert(static_cast<u32>(MemoryState::Stack) == 0x005C3C0B);
static_assert(static_cast<u32>(MemoryState::ThreadLocal) == 0x0040200C); static_assert(static_cast<u32>(MemoryState::ThreadLocal) == 0x0040200C);
static_assert(static_cast<u32>(MemoryState::Transfered) == 0x015C3C0D); static_assert(static_cast<u32>(MemoryState::Transferred) == 0x015C3C0D);
static_assert(static_cast<u32>(MemoryState::SharedTransfered) == 0x005C380E); static_assert(static_cast<u32>(MemoryState::SharedTransferred) == 0x005C380E);
static_assert(static_cast<u32>(MemoryState::SharedCode) == 0x0040380F); static_assert(static_cast<u32>(MemoryState::SharedCode) == 0x0040380F);
static_assert(static_cast<u32>(MemoryState::Inaccessible) == 0x00000010); static_assert(static_cast<u32>(MemoryState::Inaccessible) == 0x00000010);
static_assert(static_cast<u32>(MemoryState::NonSecureIpc) == 0x005C3811); static_assert(static_cast<u32>(MemoryState::NonSecureIpc) == 0x005C3811);

View file

@ -1007,8 +1007,8 @@ constexpr VAddr PageTable::GetRegionAddress(MemoryState state) const {
case MemoryState::Shared: case MemoryState::Shared:
case MemoryState::AliasCode: case MemoryState::AliasCode:
case MemoryState::AliasCodeData: case MemoryState::AliasCodeData:
case MemoryState::Transfered: case MemoryState::Transferred:
case MemoryState::SharedTransfered: case MemoryState::SharedTransferred:
case MemoryState::SharedCode: case MemoryState::SharedCode:
case MemoryState::GeneratedCode: case MemoryState::GeneratedCode:
case MemoryState::CodeOut: case MemoryState::CodeOut:
@ -1042,8 +1042,8 @@ constexpr std::size_t PageTable::GetRegionSize(MemoryState state) const {
case MemoryState::Shared: case MemoryState::Shared:
case MemoryState::AliasCode: case MemoryState::AliasCode:
case MemoryState::AliasCodeData: case MemoryState::AliasCodeData:
case MemoryState::Transfered: case MemoryState::Transferred:
case MemoryState::SharedTransfered: case MemoryState::SharedTransferred:
case MemoryState::SharedCode: case MemoryState::SharedCode:
case MemoryState::GeneratedCode: case MemoryState::GeneratedCode:
case MemoryState::CodeOut: case MemoryState::CodeOut:
@ -1080,8 +1080,8 @@ constexpr bool PageTable::CanContain(VAddr addr, std::size_t size, MemoryState s
case MemoryState::AliasCodeData: case MemoryState::AliasCodeData:
case MemoryState::Stack: case MemoryState::Stack:
case MemoryState::ThreadLocal: case MemoryState::ThreadLocal:
case MemoryState::Transfered: case MemoryState::Transferred:
case MemoryState::SharedTransfered: case MemoryState::SharedTransferred:
case MemoryState::SharedCode: case MemoryState::SharedCode:
case MemoryState::GeneratedCode: case MemoryState::GeneratedCode:
case MemoryState::CodeOut: case MemoryState::CodeOut:

View file

@ -23,8 +23,8 @@ enum class MemoryState : u32 {
Ipc = 0x0A, Ipc = 0x0A,
Stack = 0x0B, Stack = 0x0B,
ThreadLocal = 0x0C, ThreadLocal = 0x0C,
Transfered = 0x0D, Transferred = 0x0D,
SharedTransfered = 0x0E, SharedTransferred = 0x0E,
SharedCode = 0x0F, SharedCode = 0x0F,
Inaccessible = 0x10, Inaccessible = 0x10,
NonSecureIpc = 0x11, NonSecureIpc = 0x11,

View file

@ -560,14 +560,14 @@ void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequest
AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) { AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) {
on_new_message = on_new_message =
Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OnMessageRecieved"); Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OnMessageReceived");
on_operation_mode_changed = on_operation_mode_changed =
Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OperationModeChanged"); Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OperationModeChanged");
} }
AppletMessageQueue::~AppletMessageQueue() = default; AppletMessageQueue::~AppletMessageQueue() = default;
const std::shared_ptr<Kernel::ReadableEvent>& AppletMessageQueue::GetMesssageRecieveEvent() const { const std::shared_ptr<Kernel::ReadableEvent>& AppletMessageQueue::GetMessageReceiveEvent() const {
return on_new_message.readable; return on_new_message.readable;
} }
@ -675,7 +675,7 @@ void ICommonStateGetter::GetEventHandle(Kernel::HLERequestContext& ctx) {
IPC::ResponseBuilder rb{ctx, 2, 1}; IPC::ResponseBuilder rb{ctx, 2, 1};
rb.Push(RESULT_SUCCESS); rb.Push(RESULT_SUCCESS);
rb.PushCopyObjects(msg_queue->GetMesssageRecieveEvent()); rb.PushCopyObjects(msg_queue->GetMessageReceiveEvent());
} }
void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) { void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) {

View file

@ -55,7 +55,7 @@ public:
explicit AppletMessageQueue(Kernel::KernelCore& kernel); explicit AppletMessageQueue(Kernel::KernelCore& kernel);
~AppletMessageQueue(); ~AppletMessageQueue();
const std::shared_ptr<Kernel::ReadableEvent>& GetMesssageRecieveEvent() const; const std::shared_ptr<Kernel::ReadableEvent>& GetMessageReceiveEvent() const;
const std::shared_ptr<Kernel::ReadableEvent>& GetOperationModeChangedEvent() const; const std::shared_ptr<Kernel::ReadableEvent>& GetOperationModeChangedEvent() const;
void PushMessage(AppletMessage msg); void PushMessage(AppletMessage msg);
AppletMessage PopMessage(); AppletMessage PopMessage();

View file

@ -131,6 +131,7 @@ struct Values {
bool cpuopt_unsafe_unfuse_fma; bool cpuopt_unsafe_unfuse_fma;
bool cpuopt_unsafe_reduce_fp_error; bool cpuopt_unsafe_reduce_fp_error;
bool cpuopt_unsafe_inaccurate_nan;
// Renderer // Renderer
Setting<RendererBackend> renderer_backend; Setting<RendererBackend> renderer_backend;
@ -221,7 +222,7 @@ struct Values {
bool disable_macro_jit; bool disable_macro_jit;
bool extended_logging; bool extended_logging;
// Misceallaneous // Miscellaneous
std::string log_filter; std::string log_filter;
bool use_dev_keys; bool use_dev_keys;

View file

@ -120,17 +120,17 @@ private:
/// For use in initialization, querying devices to find the adapter /// For use in initialization, querying devices to find the adapter
void Setup(); void Setup();
/// Resets status of all GC controller devices to a disconected state /// Resets status of all GC controller devices to a disconnected state
void ResetDevices(); void ResetDevices();
/// Resets status of device connected to a disconected state /// Resets status of device connected to a disconnected state
void ResetDevice(std::size_t port); void ResetDevice(std::size_t port);
/// Returns true if we successfully gain access to GC Adapter /// Returns true if we successfully gain access to GC Adapter
bool CheckDeviceAccess(); bool CheckDeviceAccess();
/// Captures GC Adapter endpoint address /// Captures GC Adapter endpoint address
/// Returns true if the endpoind was set correctly /// Returns true if the endpoint was set correctly
bool GetGCEndpoint(libusb_device* device); bool GetGCEndpoint(libusb_device* device);
/// For shutting down, clear all data, join all threads, release usb /// For shutting down, clear all data, join all threads, release usb

View file

@ -129,7 +129,7 @@ void MotionInput::UpdateOrientation(u64 elapsed_time) {
rad_gyro += ki * integral_error; rad_gyro += ki * integral_error;
rad_gyro += kd * derivative_error; rad_gyro += kd * derivative_error;
} else { } else {
// Give more weight to acelerometer values to compensate for the lack of gyro // Give more weight to accelerometer values to compensate for the lack of gyro
rad_gyro += 35.0f * kp * real_error; rad_gyro += 35.0f * kp * real_error;
rad_gyro += 10.0f * ki * integral_error; rad_gyro += 10.0f * ki * integral_error;
rad_gyro += 10.0f * kd * derivative_error; rad_gyro += 10.0f * kd * derivative_error;

View file

@ -20,7 +20,7 @@ enum class MouseButton {
Left, Left,
Wheel, Wheel,
Right, Right,
Foward, Forward,
Backward, Backward,
Undefined, Undefined,
}; };

View file

@ -28,14 +28,14 @@ private:
mutable std::mutex mutex; mutable std::mutex mutex;
}; };
/// A motion device factory that creates motion devices from JC Adapter /// A motion device factory that creates motion devices from a UDP client
UDPMotionFactory::UDPMotionFactory(std::shared_ptr<CemuhookUDP::Client> client_) UDPMotionFactory::UDPMotionFactory(std::shared_ptr<CemuhookUDP::Client> client_)
: client(std::move(client_)) {} : client(std::move(client_)) {}
/** /**
* Creates motion device * Creates motion device
* @param params contains parameters for creating the device: * @param params contains parameters for creating the device:
* - "port": the nth jcpad on the adapter * - "port": the UDP port number
*/ */
std::unique_ptr<Input::MotionDevice> UDPMotionFactory::Create(const Common::ParamPackage& params) { std::unique_ptr<Input::MotionDevice> UDPMotionFactory::Create(const Common::ParamPackage& params) {
auto ip = params.Get("ip", "127.0.0.1"); auto ip = params.Get("ip", "127.0.0.1");
@ -90,14 +90,14 @@ private:
mutable std::mutex mutex; mutable std::mutex mutex;
}; };
/// A motion device factory that creates motion devices from JC Adapter /// A motion device factory that creates motion devices from a UDP client
UDPTouchFactory::UDPTouchFactory(std::shared_ptr<CemuhookUDP::Client> client_) UDPTouchFactory::UDPTouchFactory(std::shared_ptr<CemuhookUDP::Client> client_)
: client(std::move(client_)) {} : client(std::move(client_)) {}
/** /**
* Creates motion device * Creates motion device
* @param params contains parameters for creating the device: * @param params contains parameters for creating the device:
* - "port": the nth jcpad on the adapter * - "port": the UDP port number
*/ */
std::unique_ptr<Input::TouchDevice> UDPTouchFactory::Create(const Common::ParamPackage& params) { std::unique_ptr<Input::TouchDevice> UDPTouchFactory::Create(const Common::ParamPackage& params) {
auto ip = params.Get("ip", "127.0.0.1"); auto ip = params.Get("ip", "127.0.0.1");

View file

@ -207,7 +207,7 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) {
} }
/** This test checks for fiber thread exchange configuration and validates that fibers are /** This test checks for fiber thread exchange configuration and validates that fibers are
* that a fiber has been succesfully transfered from one thread to another and that the TLS * that a fiber has been successfully transferred from one thread to another and that the TLS
* region of the thread is kept while changing fibers. * region of the thread is kept while changing fibers.
*/ */
TEST_CASE("Fibers::InterExchange", "[common]") { TEST_CASE("Fibers::InterExchange", "[common]") {
@ -299,7 +299,7 @@ static void ThreadStart3(u32 id, TestControl3& test_control) {
} }
/** This test checks for one two threads racing for starting the same fiber. /** This test checks for one two threads racing for starting the same fiber.
* It checks execution occured in an ordered manner and by no time there were * It checks execution occurred in an ordered manner and by no time there were
* two contexts at the same time. * two contexts at the same time.
*/ */
TEST_CASE("Fibers::StartRace", "[common]") { TEST_CASE("Fibers::StartRace", "[common]") {

View file

@ -53,7 +53,7 @@ void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
void Vic::Execute() { void Vic::Execute() {
if (output_surface_luma_address == 0) { if (output_surface_luma_address == 0) {
LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}", LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}",
vic_state.output_surface.luma_offset); vic_state.output_surface.luma_offset);
return; return;
} }

View file

@ -491,7 +491,7 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla
} }
void VKDevice::ReportLoss() const { void VKDevice::ReportLoss() const {
LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); LOG_CRITICAL(Render_Vulkan, "Device loss occurred!");
// Wait for the log to flush and for Nsight Aftermath to dump the results // Wait for the log to flush and for Nsight Aftermath to dump the results
std::this_thread::sleep_for(std::chrono::seconds{15}); std::this_thread::sleep_for(std::chrono::seconds{15});

View file

@ -19,7 +19,7 @@ QtErrorDisplay::~QtErrorDisplay() = default;
void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const { void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const {
callback = std::move(finished); callback = std::move(finished);
emit MainWindowDisplayError( emit MainWindowDisplayError(
tr("An error has occured.\nPlease try again or contact the developer of the " tr("An error has occurred.\nPlease try again or contact the developer of the "
"software.\n\nError Code: %1-%2 (0x%3)") "software.\n\nError Code: %1-%2 (0x%3)")
.arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
.arg(error.description, 4, 10, QChar::fromLatin1('0')) .arg(error.description, 4, 10, QChar::fromLatin1('0'))
@ -32,7 +32,7 @@ void QtErrorDisplay::ShowErrorWithTimestamp(ResultCode error, std::chrono::secon
const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count()); const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count());
emit MainWindowDisplayError( emit MainWindowDisplayError(
tr("An error occured on %1 at %2.\nPlease try again or contact the " tr("An error occurred on %1 at %2.\nPlease try again or contact the "
"developer of the software.\n\nError Code: %3-%4 (0x%5)") "developer of the software.\n\nError Code: %3-%4 (0x%5)")
.arg(date_time.toString(QStringLiteral("dddd, MMMM d, yyyy"))) .arg(date_time.toString(QStringLiteral("dddd, MMMM d, yyyy")))
.arg(date_time.toString(QStringLiteral("h:mm:ss A"))) .arg(date_time.toString(QStringLiteral("h:mm:ss A")))
@ -46,7 +46,7 @@ void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_te
std::function<void()> finished) const { std::function<void()> finished) const {
callback = std::move(finished); callback = std::move(finished);
emit MainWindowDisplayError( emit MainWindowDisplayError(
tr("An error has occured.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5") tr("An error has occurred.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5")
.arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
.arg(error.description, 4, 10, QChar::fromLatin1('0')) .arg(error.description, 4, 10, QChar::fromLatin1('0'))
.arg(error.raw, 8, 16, QChar::fromLatin1('0')) .arg(error.raw, 8, 16, QChar::fromLatin1('0'))

View file

@ -72,7 +72,7 @@ void CompatDB::Submit() {
void CompatDB::OnTestcaseSubmitted() { void CompatDB::OnTestcaseSubmitted() {
if (!testcase_watcher.result()) { if (!testcase_watcher.result()) {
QMessageBox::critical(this, tr("Communication error"), QMessageBox::critical(this, tr("Communication error"),
tr("An error occured while sending the Testcase")); tr("An error occurred while sending the Testcase"));
button(NextButton)->setEnabled(true); button(NextButton)->setEnabled(true);
button(NextButton)->setText(tr("Next")); button(NextButton)->setText(tr("Next"));
button(CancelButton)->setVisible(true); button(CancelButton)->setVisible(true);

View file

@ -764,6 +764,8 @@ void Config::ReadCpuValues() {
ReadSetting(QStringLiteral("cpuopt_unsafe_unfuse_fma"), true).toBool(); ReadSetting(QStringLiteral("cpuopt_unsafe_unfuse_fma"), true).toBool();
Settings::values.cpuopt_unsafe_reduce_fp_error = Settings::values.cpuopt_unsafe_reduce_fp_error =
ReadSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true).toBool(); ReadSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true).toBool();
Settings::values.cpuopt_unsafe_inaccurate_nan =
ReadSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true).toBool();
} }
qt_config->endGroup(); qt_config->endGroup();
@ -1327,6 +1329,8 @@ void Config::SaveCpuValues() {
Settings::values.cpuopt_unsafe_unfuse_fma, true); Settings::values.cpuopt_unsafe_unfuse_fma, true);
WriteSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), WriteSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"),
Settings::values.cpuopt_unsafe_reduce_fp_error, true); Settings::values.cpuopt_unsafe_reduce_fp_error, true);
WriteSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"),
Settings::values.cpuopt_unsafe_inaccurate_nan, true);
} }
qt_config->endGroup(); qt_config->endGroup();

View file

@ -36,6 +36,8 @@ void ConfigureCpu::SetConfiguration() {
ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma); ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma);
ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock); ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock);
ui->cpuopt_unsafe_reduce_fp_error->setChecked(Settings::values.cpuopt_unsafe_reduce_fp_error); ui->cpuopt_unsafe_reduce_fp_error->setChecked(Settings::values.cpuopt_unsafe_reduce_fp_error);
ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
ui->cpuopt_unsafe_inaccurate_nan->setChecked(Settings::values.cpuopt_unsafe_inaccurate_nan);
} }
void ConfigureCpu::AccuracyUpdated(int index) { void ConfigureCpu::AccuracyUpdated(int index) {
@ -61,6 +63,7 @@ void ConfigureCpu::ApplyConfiguration() {
static_cast<Settings::CPUAccuracy>(ui->accuracy->currentIndex()); static_cast<Settings::CPUAccuracy>(ui->accuracy->currentIndex());
Settings::values.cpuopt_unsafe_unfuse_fma = ui->cpuopt_unsafe_unfuse_fma->isChecked(); Settings::values.cpuopt_unsafe_unfuse_fma = ui->cpuopt_unsafe_unfuse_fma->isChecked();
Settings::values.cpuopt_unsafe_reduce_fp_error = ui->cpuopt_unsafe_reduce_fp_error->isChecked(); Settings::values.cpuopt_unsafe_reduce_fp_error = ui->cpuopt_unsafe_reduce_fp_error->isChecked();
Settings::values.cpuopt_unsafe_inaccurate_nan = ui->cpuopt_unsafe_inaccurate_nan->isChecked();
} }
void ConfigureCpu::changeEvent(QEvent* event) { void ConfigureCpu::changeEvent(QEvent* event) {

View file

@ -109,6 +109,18 @@
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QCheckBox" name="cpuopt_unsafe_inaccurate_nan">
<property name="text">
<string>Inaccurate NaN handling</string>
</property>
<property name="toolTip">
<string>
&lt;div&gt;This option improves speed by removing NaN checking. Please note this also reduces accuracy of certain floating-point instructions.&lt;/div&gt;
</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
</item> </item>

View file

@ -142,7 +142,7 @@ constexpr int default_mouse_timeout = 2500;
/** /**
* "Callouts" are one-time instructional messages shown to the user. In the config settings, there * "Callouts" are one-time instructional messages shown to the user. In the config settings, there
* is a bitfield "callout_flags" options, used to track if a message has already been shown to the * is a bitfield "callout_flags" options, used to track if a message has already been shown to the
* user. This is 32-bits - if we have more than 32 callouts, we should retire and recyle old ones. * user. This is 32-bits - if we have more than 32 callouts, we should retire and recycle old ones.
*/ */
enum class CalloutFlag : uint32_t { enum class CalloutFlag : uint32_t {
Telemetry = 0x1, Telemetry = 0x1,

View file

@ -202,7 +202,7 @@ int main(int argc, char** argv) {
const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader); const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader);
const u16 error_id = static_cast<u16>(load_result) - loader_id; const u16 error_id = static_cast<u16>(load_result) - loader_id;
LOG_CRITICAL(Frontend, LOG_CRITICAL(Frontend,
"While attempting to load the ROM requested, an error occured. Please " "While attempting to load the ROM requested, an error occurred. Please "
"refer to the yuzu wiki for more information or the yuzu discord for " "refer to the yuzu wiki for more information or the yuzu discord for "
"additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}", "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}",
loader_id, error_id, static_cast<Loader::ResultStatus>(error_id)); loader_id, error_id, static_cast<Loader::ResultStatus>(error_id));

View file

@ -242,7 +242,7 @@ int main(int argc, char** argv) {
const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader); const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader);
const u16 error_id = static_cast<u16>(load_result) - loader_id; const u16 error_id = static_cast<u16>(load_result) - loader_id;
LOG_CRITICAL(Frontend, LOG_CRITICAL(Frontend,
"While attempting to load the ROM requested, an error occured. Please " "While attempting to load the ROM requested, an error occurred. Please "
"refer to the yuzu wiki for more information or the yuzu discord for " "refer to the yuzu wiki for more information or the yuzu discord for "
"additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}", "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}",
loader_id, error_id, static_cast<Loader::ResultStatus>(error_id)); loader_id, error_id, static_cast<Loader::ResultStatus>(error_id));