early-access version 1279
This commit is contained in:
parent
84d5e05316
commit
f7b2c59575
35 changed files with 399 additions and 112 deletions
|
@ -1,7 +1,7 @@
|
||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 1277.
|
This is the source code for early-access 1279.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
|
|
@ -39,6 +39,9 @@ enum class OptimizationFlag : std::uint32_t {
|
||||||
/// This is an UNSAFE optimization that reduces accuracy of certain floating-point instructions.
|
/// This is an UNSAFE optimization that reduces accuracy of certain floating-point instructions.
|
||||||
/// This allows results of FRECPE and FRSQRTE to have **less** error than spec allows.
|
/// This allows results of FRECPE and FRSQRTE to have **less** error than spec allows.
|
||||||
Unsafe_ReducedErrorFP = 0x00020000,
|
Unsafe_ReducedErrorFP = 0x00020000,
|
||||||
|
/// This is an UNSAFE optimization that causes floating-point instructions to not produce correct NaNs.
|
||||||
|
/// This may also result in inaccurate results when instructions are given certain special values.
|
||||||
|
Unsafe_InaccurateNaN = 0x00040000,
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr OptimizationFlag no_optimizations = static_cast<OptimizationFlag>(0);
|
constexpr OptimizationFlag no_optimizations = static_cast<OptimizationFlag>(0);
|
||||||
|
|
|
@ -1080,29 +1080,40 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
auto& carry_in = args[2];
|
auto& carry_in = args[2];
|
||||||
|
const bool is_cmp = inst->UseCount() == size_t(!!carry_inst + !!overflow_inst + !!nzcv_inst) && carry_in.IsImmediate() && carry_in.GetImmediateU1();
|
||||||
|
|
||||||
const Xbyak::Reg64 nzcv = DoNZCV(code, ctx.reg_alloc, nzcv_inst);
|
const Xbyak::Reg64 nzcv = DoNZCV(code, ctx.reg_alloc, nzcv_inst);
|
||||||
const Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(bitsize);
|
const Xbyak::Reg result = (is_cmp ? ctx.reg_alloc.UseGpr(args[0]) : ctx.reg_alloc.UseScratchGpr(args[0])).changeBit(bitsize);
|
||||||
const Xbyak::Reg8 carry = DoCarry(ctx.reg_alloc, carry_in, carry_inst);
|
const Xbyak::Reg8 carry = DoCarry(ctx.reg_alloc, carry_in, carry_inst);
|
||||||
const Xbyak::Reg8 overflow = overflow_inst ? ctx.reg_alloc.ScratchGpr().cvt8() : Xbyak::Reg8{-1};
|
const Xbyak::Reg8 overflow = overflow_inst ? ctx.reg_alloc.ScratchGpr().cvt8() : Xbyak::Reg8{-1};
|
||||||
|
|
||||||
// TODO: Consider using LEA.
|
// TODO: Consider using LEA.
|
||||||
// TODO: Optimize CMP case.
|
|
||||||
// Note that x64 CF is inverse of what the ARM carry flag is here.
|
// Note that x64 CF is inverse of what the ARM carry flag is here.
|
||||||
|
|
||||||
|
bool invert_output_carry = true;
|
||||||
|
|
||||||
|
if (is_cmp) {
|
||||||
if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) {
|
if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) {
|
||||||
|
const u32 op_arg = args[1].GetImmediateU32();
|
||||||
|
code.cmp(result, op_arg);
|
||||||
|
} else {
|
||||||
|
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]);
|
||||||
|
op_arg.setBit(bitsize);
|
||||||
|
code.cmp(result, *op_arg);
|
||||||
|
}
|
||||||
|
} else if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) {
|
||||||
const u32 op_arg = args[1].GetImmediateU32();
|
const u32 op_arg = args[1].GetImmediateU32();
|
||||||
if (carry_in.IsImmediate()) {
|
if (carry_in.IsImmediate()) {
|
||||||
if (carry_in.GetImmediateU1()) {
|
if (carry_in.GetImmediateU1()) {
|
||||||
code.sub(result, op_arg);
|
code.sub(result, op_arg);
|
||||||
} else {
|
} else {
|
||||||
code.stc();
|
code.add(result, ~op_arg);
|
||||||
code.sbb(result, op_arg);
|
invert_output_carry = false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
code.bt(carry.cvt32(), 0);
|
code.bt(carry.cvt32(), 0);
|
||||||
code.cmc();
|
code.adc(result, ~op_arg);
|
||||||
code.sbb(result, op_arg);
|
invert_output_carry = false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]);
|
OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]);
|
||||||
|
@ -1122,14 +1133,20 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nzcv_inst) {
|
if (nzcv_inst) {
|
||||||
|
if (invert_output_carry) {
|
||||||
code.cmc();
|
code.cmc();
|
||||||
|
}
|
||||||
code.lahf();
|
code.lahf();
|
||||||
code.seto(code.al);
|
code.seto(code.al);
|
||||||
ctx.reg_alloc.DefineValue(nzcv_inst, nzcv);
|
ctx.reg_alloc.DefineValue(nzcv_inst, nzcv);
|
||||||
ctx.EraseInstruction(nzcv_inst);
|
ctx.EraseInstruction(nzcv_inst);
|
||||||
}
|
}
|
||||||
if (carry_inst) {
|
if (carry_inst) {
|
||||||
|
if (invert_output_carry) {
|
||||||
code.setnc(carry);
|
code.setnc(carry);
|
||||||
|
} else {
|
||||||
|
code.setc(carry);
|
||||||
|
}
|
||||||
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
ctx.reg_alloc.DefineValue(carry_inst, carry);
|
||||||
ctx.EraseInstruction(carry_inst);
|
ctx.EraseInstruction(carry_inst);
|
||||||
}
|
}
|
||||||
|
@ -1138,8 +1155,9 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit
|
||||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||||
ctx.EraseInstruction(overflow_inst);
|
ctx.EraseInstruction(overflow_inst);
|
||||||
}
|
}
|
||||||
|
if (!is_cmp) {
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitSub32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
|
@ -257,7 +257,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
|
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
if (!ctx.FPCR().DN()) {
|
if (!ctx.FPCR().DN() && !ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
end = ProcessNaN<fsize>(code, result);
|
end = ProcessNaN<fsize>(code, result);
|
||||||
}
|
}
|
||||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
|
@ -265,7 +265,9 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
} else {
|
} else {
|
||||||
fn(result);
|
fn(result);
|
||||||
}
|
}
|
||||||
if (ctx.FPCR().DN()) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
// Do nothing
|
||||||
|
} else if (ctx.FPCR().DN()) {
|
||||||
ForceToDefaultNaN<fsize>(code, result);
|
ForceToDefaultNaN<fsize>(code, result);
|
||||||
} else {
|
} else {
|
||||||
PostProcessNaN<fsize>(code, result, ctx.reg_alloc.ScratchXmm());
|
PostProcessNaN<fsize>(code, result, ctx.reg_alloc.ScratchXmm());
|
||||||
|
@ -281,6 +283,20 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
|
(code.*fn)(result, operand);
|
||||||
|
} else {
|
||||||
|
fn(result, operand);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (ctx.FPCR().DN()) {
|
if (ctx.FPCR().DN()) {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
@ -590,9 +606,20 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
if (code.HasFMA()) {
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
if (code.HasFMA() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
|
||||||
|
|
||||||
|
FCODE(vfmadd231s)(result, operand2, operand3);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code.HasFMA()) {
|
||||||
Xbyak::Label end, fallback;
|
Xbyak::Label end, fallback;
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
@ -641,8 +668,6 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
|
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
|
||||||
|
@ -810,6 +835,22 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
|
if (code.HasFMA() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
Xbyak::Label end, fallback;
|
||||||
|
|
||||||
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 2>()));
|
||||||
|
FCODE(vfnmadd231s)(result, operand1, operand2);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (code.HasFMA()) {
|
if (code.HasFMA()) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
@ -998,6 +1039,21 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
|
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
code.vmovaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 3>()));
|
||||||
|
FCODE(vfnmadd231s)(result, operand1, operand2);
|
||||||
|
FCODE(vmuls)(result, result, code.MConst(xword, FP::FPValue<FPT, false, -1, 1>()));
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (code.HasFMA() && code.HasAVX()) {
|
if (code.HasFMA() && code.HasAVX()) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
|
|
@ -290,7 +290,7 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
|
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
|
||||||
|
|
||||||
if (ctx.FPCR(fpcr_controlled).DN()) {
|
if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
Xbyak::Xmm result;
|
Xbyak::Xmm result;
|
||||||
|
|
||||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
|
@ -306,7 +306,9 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
|
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
|
||||||
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
return;
|
return;
|
||||||
|
@ -342,7 +344,7 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||||
|
|
||||||
if (ctx.FPCR(fpcr_controlled).DN()) {
|
if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
|
@ -356,7 +358,9 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), xmm_a);
|
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), xmm_a);
|
||||||
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
return;
|
return;
|
||||||
|
@ -985,11 +989,23 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
};
|
};
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
if (code.HasFMA() && code.HasAVX()) {
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
|
|
||||||
|
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
|
||||||
|
|
||||||
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
||||||
|
FCODE(vfmadd231p)(result, xmm_b, xmm_c);
|
||||||
|
});
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code.HasFMA() && code.HasAVX()) {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
@ -1025,8 +1041,6 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
|
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
|
||||||
|
@ -1233,10 +1247,24 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
};
|
};
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
if (code.HasFMA() && code.HasAVX()) {
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||||
|
|
||||||
|
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
||||||
|
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
|
||||||
|
FCODE(vfnmadd231p)(result, operand1, operand2);
|
||||||
|
});
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code.HasFMA() && code.HasAVX()) {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
@ -1269,8 +1297,6 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
@ -1428,10 +1454,25 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
};
|
};
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
if (code.HasFMA() && code.HasAVX()) {
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||||
|
|
||||||
|
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
|
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&]{
|
||||||
|
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
|
||||||
|
FCODE(vfnmadd231p)(result, operand1, operand2);
|
||||||
|
FCODE(vmulp)(result, result, GetVectorOf<fsize, false, -1, 1>(code));
|
||||||
|
});
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code.HasFMA() && code.HasAVX()) {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
@ -1470,8 +1511,6 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
|
||||||
|
|
||||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
|
@ -27,7 +27,7 @@ bool TranslatorVisitor::CCMP_reg(bool sf, Reg Rm, Cond cond, Reg Rn, Imm<4> nzcv
|
||||||
const IR::U32U64 operand1 = X(datasize, Rn);
|
const IR::U32U64 operand1 = X(datasize, Rn);
|
||||||
const IR::U32U64 operand2 = X(datasize, Rm);
|
const IR::U32U64 operand2 = X(datasize, Rm);
|
||||||
|
|
||||||
const IR::NZCV then_flags = ir.NZCVFrom(ir.AddWithCarry(operand1, ir.Not(operand2), ir.Imm1(1)));
|
const IR::NZCV then_flags = ir.NZCVFrom(ir.SubWithCarry(operand1, operand2, ir.Imm1(1)));
|
||||||
const IR::NZCV else_flags = ir.NZCVFromPackedFlags(ir.Imm32(flags));
|
const IR::NZCV else_flags = ir.NZCVFromPackedFlags(ir.Imm32(flags));
|
||||||
ir.SetNZCV(ir.ConditionalSelect(cond, then_flags, else_flags));
|
ir.SetNZCV(ir.ConditionalSelect(cond, then_flags, else_flags));
|
||||||
return true;
|
return true;
|
||||||
|
@ -53,7 +53,7 @@ bool TranslatorVisitor::CCMP_imm(bool sf, Imm<5> imm5, Cond cond, Reg Rn, Imm<4>
|
||||||
const IR::U32U64 operand1 = X(datasize, Rn);
|
const IR::U32U64 operand1 = X(datasize, Rn);
|
||||||
const IR::U32U64 operand2 = I(datasize, imm5.ZeroExtend<u32>());
|
const IR::U32U64 operand2 = I(datasize, imm5.ZeroExtend<u32>());
|
||||||
|
|
||||||
const IR::NZCV then_flags = ir.NZCVFrom(ir.AddWithCarry(operand1, ir.Not(operand2), ir.Imm1(1)));
|
const IR::NZCV then_flags = ir.NZCVFrom(ir.SubWithCarry(operand1, operand2, ir.Imm1(1)));
|
||||||
const IR::NZCV else_flags = ir.NZCVFromPackedFlags(ir.Imm32(flags));
|
const IR::NZCV else_flags = ir.NZCVFromPackedFlags(ir.Imm32(flags));
|
||||||
ir.SetNZCV(ir.ConditionalSelect(cond, then_flags, else_flags));
|
ir.SetNZCV(ir.ConditionalSelect(cond, then_flags, else_flags));
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -98,7 +98,6 @@ add_library(common STATIC
|
||||||
algorithm.h
|
algorithm.h
|
||||||
alignment.h
|
alignment.h
|
||||||
assert.h
|
assert.h
|
||||||
atomic_ops.cpp
|
|
||||||
atomic_ops.h
|
atomic_ops.h
|
||||||
detached_tasks.cpp
|
detached_tasks.cpp
|
||||||
detached_tasks.h
|
detached_tasks.h
|
||||||
|
|
|
@ -4,14 +4,75 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
#if _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Common {
|
namespace Common {
|
||||||
|
|
||||||
[[nodiscard]] bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected);
|
#if _MSC_VER
|
||||||
[[nodiscard]] bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected);
|
|
||||||
[[nodiscard]] bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected);
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
|
||||||
[[nodiscard]] bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected);
|
const u8 result =
|
||||||
[[nodiscard]] bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected);
|
_InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
|
||||||
|
return result == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
|
||||||
|
const u16 result =
|
||||||
|
_InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
|
||||||
|
return result == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
|
||||||
|
const u32 result =
|
||||||
|
_InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
|
||||||
|
return result == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
|
||||||
|
const u64 result = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer),
|
||||||
|
value, expected);
|
||||||
|
return result == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
|
||||||
|
return _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
|
||||||
|
value[0],
|
||||||
|
reinterpret_cast<__int64*>(expected.data())) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
|
||||||
|
return __sync_bool_compare_and_swap(pointer, expected, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
|
||||||
|
return __sync_bool_compare_and_swap(pointer, expected, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
|
||||||
|
return __sync_bool_compare_and_swap(pointer, expected, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
|
||||||
|
return __sync_bool_compare_and_swap(pointer, expected, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
|
||||||
|
unsigned __int128 value_a;
|
||||||
|
unsigned __int128 expected_a;
|
||||||
|
std::memcpy(&value_a, value.data(), sizeof(u128));
|
||||||
|
std::memcpy(&expected_a, expected.data(), sizeof(u128));
|
||||||
|
return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace Common
|
} // namespace Common
|
||||||
|
|
|
@ -90,7 +90,7 @@ struct PageTable {
|
||||||
PageTable& operator=(PageTable&&) noexcept = default;
|
PageTable& operator=(PageTable&&) noexcept = default;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Resizes the page table to be able to accomodate enough pages within
|
* Resizes the page table to be able to accommodate enough pages within
|
||||||
* a given address space.
|
* a given address space.
|
||||||
*
|
*
|
||||||
* @param address_space_width_in_bits The address size width in bits.
|
* @param address_space_width_in_bits The address size width in bits.
|
||||||
|
|
|
@ -394,7 +394,7 @@ public:
|
||||||
template <typename S, typename T2, typename F2>
|
template <typename S, typename T2, typename F2>
|
||||||
friend S operator%(const S& p, const swapped_t v);
|
friend S operator%(const S& p, const swapped_t v);
|
||||||
|
|
||||||
// Arithmetics + assignements
|
// Arithmetics + assignments
|
||||||
template <typename S, typename T2, typename F2>
|
template <typename S, typename T2, typename F2>
|
||||||
friend S operator+=(const S& p, const swapped_t v);
|
friend S operator+=(const S& p, const swapped_t v);
|
||||||
|
|
||||||
|
@ -451,7 +451,7 @@ S operator%(const S& i, const swap_struct_t<T, F> v) {
|
||||||
return i % v.swap();
|
return i % v.swap();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Arithmetics + assignements
|
// Arithmetics + assignments
|
||||||
template <typename S, typename T, typename F>
|
template <typename S, typename T, typename F>
|
||||||
S& operator+=(S& i, const swap_struct_t<T, F> v) {
|
S& operator+=(S& i, const swap_struct_t<T, F> v) {
|
||||||
i += v.swap();
|
i += v.swap();
|
||||||
|
|
|
@ -2,19 +2,74 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <limits>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
|
|
||||||
|
#pragma intrinsic(__umulh)
|
||||||
|
#pragma intrinsic(_udiv128)
|
||||||
#else
|
#else
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "common/atomic_ops.h"
|
||||||
#include "common/uint128.h"
|
#include "common/uint128.h"
|
||||||
#include "common/x64/native_clock.h"
|
#include "common/x64/native_clock.h"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
|
||||||
|
#ifdef __SIZEOF_INT128__
|
||||||
|
const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
|
||||||
|
return static_cast<u64>(base / divisor);
|
||||||
|
#elif defined(_M_X64) || defined(_M_ARM64)
|
||||||
|
std::array<u64, 2> r = {0, numerator};
|
||||||
|
u64 remainder;
|
||||||
|
#if _MSC_VER < 1923
|
||||||
|
return udiv128(r[1], r[0], divisor, &remainder);
|
||||||
|
#else
|
||||||
|
return _udiv128(r[1], r[0], divisor, &remainder);
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
// This one is bit more inaccurate.
|
||||||
|
return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) {
|
||||||
|
#ifdef __SIZEOF_INT128__
|
||||||
|
return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
|
||||||
|
#elif defined(_M_X64) || defined(_M_ARM64)
|
||||||
|
return __umulh(a, b); // MSVC
|
||||||
|
#else
|
||||||
|
// Generic fallback
|
||||||
|
const u64 a_lo = u32(a);
|
||||||
|
const u64 a_hi = a >> 32;
|
||||||
|
const u64 b_lo = u32(b);
|
||||||
|
const u64 b_hi = b >> 32;
|
||||||
|
|
||||||
|
const u64 a_x_b_hi = a_hi * b_hi;
|
||||||
|
const u64 a_x_b_mid = a_hi * b_lo;
|
||||||
|
const u64 b_x_a_mid = b_hi * a_lo;
|
||||||
|
const u64 a_x_b_lo = a_lo * b_lo;
|
||||||
|
|
||||||
|
const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
|
||||||
|
static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
|
||||||
|
32;
|
||||||
|
|
||||||
|
const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
|
||||||
|
|
||||||
|
return multhi;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
namespace Common {
|
namespace Common {
|
||||||
|
|
||||||
u64 EstimateRDTSCFrequency() {
|
u64 EstimateRDTSCFrequency() {
|
||||||
|
@ -48,54 +103,71 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
|
||||||
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
|
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
|
||||||
rtsc_frequency_} {
|
rtsc_frequency_} {
|
||||||
_mm_mfence();
|
_mm_mfence();
|
||||||
last_measure = __rdtsc();
|
time_point.inner.last_measure = __rdtsc();
|
||||||
accumulated_ticks = 0U;
|
time_point.inner.accumulated_ticks = 0U;
|
||||||
|
ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency);
|
||||||
|
us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency);
|
||||||
|
ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency);
|
||||||
|
clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency);
|
||||||
|
cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency);
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 NativeClock::GetRTSC() {
|
u64 NativeClock::GetRTSC() {
|
||||||
std::scoped_lock scope{rtsc_serialize};
|
TimePoint new_time_point{};
|
||||||
|
TimePoint current_time_point{};
|
||||||
|
do {
|
||||||
|
current_time_point.pack = time_point.pack;
|
||||||
_mm_mfence();
|
_mm_mfence();
|
||||||
const u64 current_measure = __rdtsc();
|
const u64 current_measure = __rdtsc();
|
||||||
u64 diff = current_measure - last_measure;
|
u64 diff = current_measure - current_time_point.inner.last_measure;
|
||||||
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
|
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
|
||||||
if (current_measure > last_measure) {
|
new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
|
||||||
last_measure = current_measure;
|
? current_measure
|
||||||
}
|
: current_time_point.inner.last_measure;
|
||||||
accumulated_ticks += diff;
|
new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;
|
||||||
|
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
|
||||||
|
current_time_point.pack));
|
||||||
/// The clock cannot be more precise than the guest timer, remove the lower bits
|
/// The clock cannot be more precise than the guest timer, remove the lower bits
|
||||||
return accumulated_ticks & inaccuracy_mask;
|
return new_time_point.inner.accumulated_ticks & inaccuracy_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
void NativeClock::Pause(bool is_paused) {
|
void NativeClock::Pause(bool is_paused) {
|
||||||
if (!is_paused) {
|
if (!is_paused) {
|
||||||
|
TimePoint current_time_point{};
|
||||||
|
TimePoint new_time_point{};
|
||||||
|
do {
|
||||||
|
current_time_point.pack = time_point.pack;
|
||||||
|
new_time_point.pack = current_time_point.pack;
|
||||||
_mm_mfence();
|
_mm_mfence();
|
||||||
last_measure = __rdtsc();
|
new_time_point.inner.last_measure = __rdtsc();
|
||||||
|
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
|
||||||
|
current_time_point.pack));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::nanoseconds NativeClock::GetTimeNS() {
|
std::chrono::nanoseconds NativeClock::GetTimeNS() {
|
||||||
const u64 rtsc_value = GetRTSC();
|
const u64 rtsc_value = GetRTSC();
|
||||||
return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)};
|
return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::microseconds NativeClock::GetTimeUS() {
|
std::chrono::microseconds NativeClock::GetTimeUS() {
|
||||||
const u64 rtsc_value = GetRTSC();
|
const u64 rtsc_value = GetRTSC();
|
||||||
return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)};
|
return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::milliseconds NativeClock::GetTimeMS() {
|
std::chrono::milliseconds NativeClock::GetTimeMS() {
|
||||||
const u64 rtsc_value = GetRTSC();
|
const u64 rtsc_value = GetRTSC();
|
||||||
return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)};
|
return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)};
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 NativeClock::GetClockCycles() {
|
u64 NativeClock::GetClockCycles() {
|
||||||
const u64 rtsc_value = GetRTSC();
|
const u64 rtsc_value = GetRTSC();
|
||||||
return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
|
return MultiplyHigh(rtsc_value, clock_rtsc_factor);
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 NativeClock::GetCPUCycles() {
|
u64 NativeClock::GetCPUCycles() {
|
||||||
const u64 rtsc_value = GetRTSC();
|
const u64 rtsc_value = GetRTSC();
|
||||||
return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
|
return MultiplyHigh(rtsc_value, cpu_rtsc_factor);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace X64
|
} // namespace X64
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
|
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
|
||||||
#include "common/spin_lock.h"
|
|
||||||
#include "common/wall_clock.h"
|
#include "common/wall_clock.h"
|
||||||
|
|
||||||
namespace Common {
|
namespace Common {
|
||||||
|
@ -32,14 +31,28 @@ public:
|
||||||
private:
|
private:
|
||||||
u64 GetRTSC();
|
u64 GetRTSC();
|
||||||
|
|
||||||
|
union alignas(16) TimePoint {
|
||||||
|
TimePoint() : pack{} {}
|
||||||
|
u128 pack{};
|
||||||
|
struct Inner {
|
||||||
|
u64 last_measure{};
|
||||||
|
u64 accumulated_ticks{};
|
||||||
|
} inner;
|
||||||
|
};
|
||||||
|
|
||||||
/// value used to reduce the native clocks accuracy as some apss rely on
|
/// value used to reduce the native clocks accuracy as some apss rely on
|
||||||
/// undefined behavior where the level of accuracy in the clock shouldn't
|
/// undefined behavior where the level of accuracy in the clock shouldn't
|
||||||
/// be higher.
|
/// be higher.
|
||||||
static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);
|
static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);
|
||||||
|
|
||||||
SpinLock rtsc_serialize{};
|
TimePoint time_point;
|
||||||
u64 last_measure{};
|
// factors
|
||||||
u64 accumulated_ticks{};
|
u64 clock_rtsc_factor{};
|
||||||
|
u64 cpu_rtsc_factor{};
|
||||||
|
u64 ns_rtsc_factor{};
|
||||||
|
u64 us_rtsc_factor{};
|
||||||
|
u64 ms_rtsc_factor{};
|
||||||
|
|
||||||
u64 rtsc_frequency;
|
u64 rtsc_frequency;
|
||||||
};
|
};
|
||||||
} // namespace X64
|
} // namespace X64
|
||||||
|
|
|
@ -181,6 +181,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
|
||||||
if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
|
if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
|
||||||
}
|
}
|
||||||
|
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
|
||||||
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::make_unique<Dynarmic::A32::Jit>(config);
|
return std::make_unique<Dynarmic::A32::Jit>(config);
|
||||||
|
|
|
@ -212,6 +212,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
|
||||||
if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
|
if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
|
||||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
|
||||||
}
|
}
|
||||||
|
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
|
||||||
|
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::make_shared<Dynarmic::A64::Jit>(config);
|
return std::make_shared<Dynarmic::A64::Jit>(config);
|
||||||
|
|
|
@ -73,11 +73,11 @@ enum class MemoryState : u32 {
|
||||||
ThreadLocal =
|
ThreadLocal =
|
||||||
static_cast<u32>(Svc::MemoryState::ThreadLocal) | FlagMapped | FlagReferenceCounted,
|
static_cast<u32>(Svc::MemoryState::ThreadLocal) | FlagMapped | FlagReferenceCounted,
|
||||||
|
|
||||||
Transfered = static_cast<u32>(Svc::MemoryState::Transfered) | FlagsMisc |
|
Transferred = static_cast<u32>(Svc::MemoryState::Transferred) | FlagsMisc |
|
||||||
FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc |
|
FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc |
|
||||||
FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,
|
FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,
|
||||||
|
|
||||||
SharedTransfered = static_cast<u32>(Svc::MemoryState::SharedTransfered) | FlagsMisc |
|
SharedTransferred = static_cast<u32>(Svc::MemoryState::SharedTransferred) | FlagsMisc |
|
||||||
FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,
|
FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,
|
||||||
|
|
||||||
SharedCode = static_cast<u32>(Svc::MemoryState::SharedCode) | FlagMapped |
|
SharedCode = static_cast<u32>(Svc::MemoryState::SharedCode) | FlagMapped |
|
||||||
|
@ -111,8 +111,8 @@ static_assert(static_cast<u32>(MemoryState::AliasCodeData) == 0x03FFBD09);
|
||||||
static_assert(static_cast<u32>(MemoryState::Ipc) == 0x005C3C0A);
|
static_assert(static_cast<u32>(MemoryState::Ipc) == 0x005C3C0A);
|
||||||
static_assert(static_cast<u32>(MemoryState::Stack) == 0x005C3C0B);
|
static_assert(static_cast<u32>(MemoryState::Stack) == 0x005C3C0B);
|
||||||
static_assert(static_cast<u32>(MemoryState::ThreadLocal) == 0x0040200C);
|
static_assert(static_cast<u32>(MemoryState::ThreadLocal) == 0x0040200C);
|
||||||
static_assert(static_cast<u32>(MemoryState::Transfered) == 0x015C3C0D);
|
static_assert(static_cast<u32>(MemoryState::Transferred) == 0x015C3C0D);
|
||||||
static_assert(static_cast<u32>(MemoryState::SharedTransfered) == 0x005C380E);
|
static_assert(static_cast<u32>(MemoryState::SharedTransferred) == 0x005C380E);
|
||||||
static_assert(static_cast<u32>(MemoryState::SharedCode) == 0x0040380F);
|
static_assert(static_cast<u32>(MemoryState::SharedCode) == 0x0040380F);
|
||||||
static_assert(static_cast<u32>(MemoryState::Inaccessible) == 0x00000010);
|
static_assert(static_cast<u32>(MemoryState::Inaccessible) == 0x00000010);
|
||||||
static_assert(static_cast<u32>(MemoryState::NonSecureIpc) == 0x005C3811);
|
static_assert(static_cast<u32>(MemoryState::NonSecureIpc) == 0x005C3811);
|
||||||
|
|
|
@ -1007,8 +1007,8 @@ constexpr VAddr PageTable::GetRegionAddress(MemoryState state) const {
|
||||||
case MemoryState::Shared:
|
case MemoryState::Shared:
|
||||||
case MemoryState::AliasCode:
|
case MemoryState::AliasCode:
|
||||||
case MemoryState::AliasCodeData:
|
case MemoryState::AliasCodeData:
|
||||||
case MemoryState::Transfered:
|
case MemoryState::Transferred:
|
||||||
case MemoryState::SharedTransfered:
|
case MemoryState::SharedTransferred:
|
||||||
case MemoryState::SharedCode:
|
case MemoryState::SharedCode:
|
||||||
case MemoryState::GeneratedCode:
|
case MemoryState::GeneratedCode:
|
||||||
case MemoryState::CodeOut:
|
case MemoryState::CodeOut:
|
||||||
|
@ -1042,8 +1042,8 @@ constexpr std::size_t PageTable::GetRegionSize(MemoryState state) const {
|
||||||
case MemoryState::Shared:
|
case MemoryState::Shared:
|
||||||
case MemoryState::AliasCode:
|
case MemoryState::AliasCode:
|
||||||
case MemoryState::AliasCodeData:
|
case MemoryState::AliasCodeData:
|
||||||
case MemoryState::Transfered:
|
case MemoryState::Transferred:
|
||||||
case MemoryState::SharedTransfered:
|
case MemoryState::SharedTransferred:
|
||||||
case MemoryState::SharedCode:
|
case MemoryState::SharedCode:
|
||||||
case MemoryState::GeneratedCode:
|
case MemoryState::GeneratedCode:
|
||||||
case MemoryState::CodeOut:
|
case MemoryState::CodeOut:
|
||||||
|
@ -1080,8 +1080,8 @@ constexpr bool PageTable::CanContain(VAddr addr, std::size_t size, MemoryState s
|
||||||
case MemoryState::AliasCodeData:
|
case MemoryState::AliasCodeData:
|
||||||
case MemoryState::Stack:
|
case MemoryState::Stack:
|
||||||
case MemoryState::ThreadLocal:
|
case MemoryState::ThreadLocal:
|
||||||
case MemoryState::Transfered:
|
case MemoryState::Transferred:
|
||||||
case MemoryState::SharedTransfered:
|
case MemoryState::SharedTransferred:
|
||||||
case MemoryState::SharedCode:
|
case MemoryState::SharedCode:
|
||||||
case MemoryState::GeneratedCode:
|
case MemoryState::GeneratedCode:
|
||||||
case MemoryState::CodeOut:
|
case MemoryState::CodeOut:
|
||||||
|
|
|
@ -23,8 +23,8 @@ enum class MemoryState : u32 {
|
||||||
Ipc = 0x0A,
|
Ipc = 0x0A,
|
||||||
Stack = 0x0B,
|
Stack = 0x0B,
|
||||||
ThreadLocal = 0x0C,
|
ThreadLocal = 0x0C,
|
||||||
Transfered = 0x0D,
|
Transferred = 0x0D,
|
||||||
SharedTransfered = 0x0E,
|
SharedTransferred = 0x0E,
|
||||||
SharedCode = 0x0F,
|
SharedCode = 0x0F,
|
||||||
Inaccessible = 0x10,
|
Inaccessible = 0x10,
|
||||||
NonSecureIpc = 0x11,
|
NonSecureIpc = 0x11,
|
||||||
|
|
|
@ -560,14 +560,14 @@ void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequest
|
||||||
|
|
||||||
AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) {
|
AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) {
|
||||||
on_new_message =
|
on_new_message =
|
||||||
Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OnMessageRecieved");
|
Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OnMessageReceived");
|
||||||
on_operation_mode_changed =
|
on_operation_mode_changed =
|
||||||
Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OperationModeChanged");
|
Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OperationModeChanged");
|
||||||
}
|
}
|
||||||
|
|
||||||
AppletMessageQueue::~AppletMessageQueue() = default;
|
AppletMessageQueue::~AppletMessageQueue() = default;
|
||||||
|
|
||||||
const std::shared_ptr<Kernel::ReadableEvent>& AppletMessageQueue::GetMesssageRecieveEvent() const {
|
const std::shared_ptr<Kernel::ReadableEvent>& AppletMessageQueue::GetMessageReceiveEvent() const {
|
||||||
return on_new_message.readable;
|
return on_new_message.readable;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -675,7 +675,7 @@ void ICommonStateGetter::GetEventHandle(Kernel::HLERequestContext& ctx) {
|
||||||
|
|
||||||
IPC::ResponseBuilder rb{ctx, 2, 1};
|
IPC::ResponseBuilder rb{ctx, 2, 1};
|
||||||
rb.Push(RESULT_SUCCESS);
|
rb.Push(RESULT_SUCCESS);
|
||||||
rb.PushCopyObjects(msg_queue->GetMesssageRecieveEvent());
|
rb.PushCopyObjects(msg_queue->GetMessageReceiveEvent());
|
||||||
}
|
}
|
||||||
|
|
||||||
void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) {
|
void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) {
|
||||||
|
|
|
@ -55,7 +55,7 @@ public:
|
||||||
explicit AppletMessageQueue(Kernel::KernelCore& kernel);
|
explicit AppletMessageQueue(Kernel::KernelCore& kernel);
|
||||||
~AppletMessageQueue();
|
~AppletMessageQueue();
|
||||||
|
|
||||||
const std::shared_ptr<Kernel::ReadableEvent>& GetMesssageRecieveEvent() const;
|
const std::shared_ptr<Kernel::ReadableEvent>& GetMessageReceiveEvent() const;
|
||||||
const std::shared_ptr<Kernel::ReadableEvent>& GetOperationModeChangedEvent() const;
|
const std::shared_ptr<Kernel::ReadableEvent>& GetOperationModeChangedEvent() const;
|
||||||
void PushMessage(AppletMessage msg);
|
void PushMessage(AppletMessage msg);
|
||||||
AppletMessage PopMessage();
|
AppletMessage PopMessage();
|
||||||
|
|
|
@ -131,6 +131,7 @@ struct Values {
|
||||||
|
|
||||||
bool cpuopt_unsafe_unfuse_fma;
|
bool cpuopt_unsafe_unfuse_fma;
|
||||||
bool cpuopt_unsafe_reduce_fp_error;
|
bool cpuopt_unsafe_reduce_fp_error;
|
||||||
|
bool cpuopt_unsafe_inaccurate_nan;
|
||||||
|
|
||||||
// Renderer
|
// Renderer
|
||||||
Setting<RendererBackend> renderer_backend;
|
Setting<RendererBackend> renderer_backend;
|
||||||
|
@ -221,7 +222,7 @@ struct Values {
|
||||||
bool disable_macro_jit;
|
bool disable_macro_jit;
|
||||||
bool extended_logging;
|
bool extended_logging;
|
||||||
|
|
||||||
// Misceallaneous
|
// Miscellaneous
|
||||||
std::string log_filter;
|
std::string log_filter;
|
||||||
bool use_dev_keys;
|
bool use_dev_keys;
|
||||||
|
|
||||||
|
|
|
@ -120,17 +120,17 @@ private:
|
||||||
/// For use in initialization, querying devices to find the adapter
|
/// For use in initialization, querying devices to find the adapter
|
||||||
void Setup();
|
void Setup();
|
||||||
|
|
||||||
/// Resets status of all GC controller devices to a disconected state
|
/// Resets status of all GC controller devices to a disconnected state
|
||||||
void ResetDevices();
|
void ResetDevices();
|
||||||
|
|
||||||
/// Resets status of device connected to a disconected state
|
/// Resets status of device connected to a disconnected state
|
||||||
void ResetDevice(std::size_t port);
|
void ResetDevice(std::size_t port);
|
||||||
|
|
||||||
/// Returns true if we successfully gain access to GC Adapter
|
/// Returns true if we successfully gain access to GC Adapter
|
||||||
bool CheckDeviceAccess();
|
bool CheckDeviceAccess();
|
||||||
|
|
||||||
/// Captures GC Adapter endpoint address
|
/// Captures GC Adapter endpoint address
|
||||||
/// Returns true if the endpoind was set correctly
|
/// Returns true if the endpoint was set correctly
|
||||||
bool GetGCEndpoint(libusb_device* device);
|
bool GetGCEndpoint(libusb_device* device);
|
||||||
|
|
||||||
/// For shutting down, clear all data, join all threads, release usb
|
/// For shutting down, clear all data, join all threads, release usb
|
||||||
|
|
|
@ -129,7 +129,7 @@ void MotionInput::UpdateOrientation(u64 elapsed_time) {
|
||||||
rad_gyro += ki * integral_error;
|
rad_gyro += ki * integral_error;
|
||||||
rad_gyro += kd * derivative_error;
|
rad_gyro += kd * derivative_error;
|
||||||
} else {
|
} else {
|
||||||
// Give more weight to acelerometer values to compensate for the lack of gyro
|
// Give more weight to accelerometer values to compensate for the lack of gyro
|
||||||
rad_gyro += 35.0f * kp * real_error;
|
rad_gyro += 35.0f * kp * real_error;
|
||||||
rad_gyro += 10.0f * ki * integral_error;
|
rad_gyro += 10.0f * ki * integral_error;
|
||||||
rad_gyro += 10.0f * kd * derivative_error;
|
rad_gyro += 10.0f * kd * derivative_error;
|
||||||
|
|
|
@ -20,7 +20,7 @@ enum class MouseButton {
|
||||||
Left,
|
Left,
|
||||||
Wheel,
|
Wheel,
|
||||||
Right,
|
Right,
|
||||||
Foward,
|
Forward,
|
||||||
Backward,
|
Backward,
|
||||||
Undefined,
|
Undefined,
|
||||||
};
|
};
|
||||||
|
|
|
@ -28,14 +28,14 @@ private:
|
||||||
mutable std::mutex mutex;
|
mutable std::mutex mutex;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A motion device factory that creates motion devices from JC Adapter
|
/// A motion device factory that creates motion devices from a UDP client
|
||||||
UDPMotionFactory::UDPMotionFactory(std::shared_ptr<CemuhookUDP::Client> client_)
|
UDPMotionFactory::UDPMotionFactory(std::shared_ptr<CemuhookUDP::Client> client_)
|
||||||
: client(std::move(client_)) {}
|
: client(std::move(client_)) {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates motion device
|
* Creates motion device
|
||||||
* @param params contains parameters for creating the device:
|
* @param params contains parameters for creating the device:
|
||||||
* - "port": the nth jcpad on the adapter
|
* - "port": the UDP port number
|
||||||
*/
|
*/
|
||||||
std::unique_ptr<Input::MotionDevice> UDPMotionFactory::Create(const Common::ParamPackage& params) {
|
std::unique_ptr<Input::MotionDevice> UDPMotionFactory::Create(const Common::ParamPackage& params) {
|
||||||
auto ip = params.Get("ip", "127.0.0.1");
|
auto ip = params.Get("ip", "127.0.0.1");
|
||||||
|
@ -90,14 +90,14 @@ private:
|
||||||
mutable std::mutex mutex;
|
mutable std::mutex mutex;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A motion device factory that creates motion devices from JC Adapter
|
/// A motion device factory that creates motion devices from a UDP client
|
||||||
UDPTouchFactory::UDPTouchFactory(std::shared_ptr<CemuhookUDP::Client> client_)
|
UDPTouchFactory::UDPTouchFactory(std::shared_ptr<CemuhookUDP::Client> client_)
|
||||||
: client(std::move(client_)) {}
|
: client(std::move(client_)) {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates motion device
|
* Creates motion device
|
||||||
* @param params contains parameters for creating the device:
|
* @param params contains parameters for creating the device:
|
||||||
* - "port": the nth jcpad on the adapter
|
* - "port": the UDP port number
|
||||||
*/
|
*/
|
||||||
std::unique_ptr<Input::TouchDevice> UDPTouchFactory::Create(const Common::ParamPackage& params) {
|
std::unique_ptr<Input::TouchDevice> UDPTouchFactory::Create(const Common::ParamPackage& params) {
|
||||||
auto ip = params.Get("ip", "127.0.0.1");
|
auto ip = params.Get("ip", "127.0.0.1");
|
||||||
|
|
|
@ -207,7 +207,7 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This test checks for fiber thread exchange configuration and validates that fibers are
|
/** This test checks for fiber thread exchange configuration and validates that fibers are
|
||||||
* that a fiber has been succesfully transfered from one thread to another and that the TLS
|
* that a fiber has been successfully transferred from one thread to another and that the TLS
|
||||||
* region of the thread is kept while changing fibers.
|
* region of the thread is kept while changing fibers.
|
||||||
*/
|
*/
|
||||||
TEST_CASE("Fibers::InterExchange", "[common]") {
|
TEST_CASE("Fibers::InterExchange", "[common]") {
|
||||||
|
@ -299,7 +299,7 @@ static void ThreadStart3(u32 id, TestControl3& test_control) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This test checks for one two threads racing for starting the same fiber.
|
/** This test checks for one two threads racing for starting the same fiber.
|
||||||
* It checks execution occured in an ordered manner and by no time there were
|
* It checks execution occurred in an ordered manner and by no time there were
|
||||||
* two contexts at the same time.
|
* two contexts at the same time.
|
||||||
*/
|
*/
|
||||||
TEST_CASE("Fibers::StartRace", "[common]") {
|
TEST_CASE("Fibers::StartRace", "[common]") {
|
||||||
|
|
|
@ -53,7 +53,7 @@ void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
|
||||||
|
|
||||||
void Vic::Execute() {
|
void Vic::Execute() {
|
||||||
if (output_surface_luma_address == 0) {
|
if (output_surface_luma_address == 0) {
|
||||||
LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}",
|
LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}",
|
||||||
vic_state.output_surface.luma_offset);
|
vic_state.output_surface.luma_offset);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -491,7 +491,7 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKDevice::ReportLoss() const {
|
void VKDevice::ReportLoss() const {
|
||||||
LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
|
LOG_CRITICAL(Render_Vulkan, "Device loss occurred!");
|
||||||
|
|
||||||
// Wait for the log to flush and for Nsight Aftermath to dump the results
|
// Wait for the log to flush and for Nsight Aftermath to dump the results
|
||||||
std::this_thread::sleep_for(std::chrono::seconds{15});
|
std::this_thread::sleep_for(std::chrono::seconds{15});
|
||||||
|
|
|
@ -19,7 +19,7 @@ QtErrorDisplay::~QtErrorDisplay() = default;
|
||||||
void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const {
|
void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const {
|
||||||
callback = std::move(finished);
|
callback = std::move(finished);
|
||||||
emit MainWindowDisplayError(
|
emit MainWindowDisplayError(
|
||||||
tr("An error has occured.\nPlease try again or contact the developer of the "
|
tr("An error has occurred.\nPlease try again or contact the developer of the "
|
||||||
"software.\n\nError Code: %1-%2 (0x%3)")
|
"software.\n\nError Code: %1-%2 (0x%3)")
|
||||||
.arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
|
.arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
|
||||||
.arg(error.description, 4, 10, QChar::fromLatin1('0'))
|
.arg(error.description, 4, 10, QChar::fromLatin1('0'))
|
||||||
|
@ -32,7 +32,7 @@ void QtErrorDisplay::ShowErrorWithTimestamp(ResultCode error, std::chrono::secon
|
||||||
|
|
||||||
const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count());
|
const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count());
|
||||||
emit MainWindowDisplayError(
|
emit MainWindowDisplayError(
|
||||||
tr("An error occured on %1 at %2.\nPlease try again or contact the "
|
tr("An error occurred on %1 at %2.\nPlease try again or contact the "
|
||||||
"developer of the software.\n\nError Code: %3-%4 (0x%5)")
|
"developer of the software.\n\nError Code: %3-%4 (0x%5)")
|
||||||
.arg(date_time.toString(QStringLiteral("dddd, MMMM d, yyyy")))
|
.arg(date_time.toString(QStringLiteral("dddd, MMMM d, yyyy")))
|
||||||
.arg(date_time.toString(QStringLiteral("h:mm:ss A")))
|
.arg(date_time.toString(QStringLiteral("h:mm:ss A")))
|
||||||
|
@ -46,7 +46,7 @@ void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_te
|
||||||
std::function<void()> finished) const {
|
std::function<void()> finished) const {
|
||||||
callback = std::move(finished);
|
callback = std::move(finished);
|
||||||
emit MainWindowDisplayError(
|
emit MainWindowDisplayError(
|
||||||
tr("An error has occured.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5")
|
tr("An error has occurred.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5")
|
||||||
.arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
|
.arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
|
||||||
.arg(error.description, 4, 10, QChar::fromLatin1('0'))
|
.arg(error.description, 4, 10, QChar::fromLatin1('0'))
|
||||||
.arg(error.raw, 8, 16, QChar::fromLatin1('0'))
|
.arg(error.raw, 8, 16, QChar::fromLatin1('0'))
|
||||||
|
|
|
@ -72,7 +72,7 @@ void CompatDB::Submit() {
|
||||||
void CompatDB::OnTestcaseSubmitted() {
|
void CompatDB::OnTestcaseSubmitted() {
|
||||||
if (!testcase_watcher.result()) {
|
if (!testcase_watcher.result()) {
|
||||||
QMessageBox::critical(this, tr("Communication error"),
|
QMessageBox::critical(this, tr("Communication error"),
|
||||||
tr("An error occured while sending the Testcase"));
|
tr("An error occurred while sending the Testcase"));
|
||||||
button(NextButton)->setEnabled(true);
|
button(NextButton)->setEnabled(true);
|
||||||
button(NextButton)->setText(tr("Next"));
|
button(NextButton)->setText(tr("Next"));
|
||||||
button(CancelButton)->setVisible(true);
|
button(CancelButton)->setVisible(true);
|
||||||
|
|
|
@ -764,6 +764,8 @@ void Config::ReadCpuValues() {
|
||||||
ReadSetting(QStringLiteral("cpuopt_unsafe_unfuse_fma"), true).toBool();
|
ReadSetting(QStringLiteral("cpuopt_unsafe_unfuse_fma"), true).toBool();
|
||||||
Settings::values.cpuopt_unsafe_reduce_fp_error =
|
Settings::values.cpuopt_unsafe_reduce_fp_error =
|
||||||
ReadSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true).toBool();
|
ReadSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true).toBool();
|
||||||
|
Settings::values.cpuopt_unsafe_inaccurate_nan =
|
||||||
|
ReadSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true).toBool();
|
||||||
}
|
}
|
||||||
|
|
||||||
qt_config->endGroup();
|
qt_config->endGroup();
|
||||||
|
@ -1327,6 +1329,8 @@ void Config::SaveCpuValues() {
|
||||||
Settings::values.cpuopt_unsafe_unfuse_fma, true);
|
Settings::values.cpuopt_unsafe_unfuse_fma, true);
|
||||||
WriteSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"),
|
WriteSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"),
|
||||||
Settings::values.cpuopt_unsafe_reduce_fp_error, true);
|
Settings::values.cpuopt_unsafe_reduce_fp_error, true);
|
||||||
|
WriteSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"),
|
||||||
|
Settings::values.cpuopt_unsafe_inaccurate_nan, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
qt_config->endGroup();
|
qt_config->endGroup();
|
||||||
|
|
|
@ -36,6 +36,8 @@ void ConfigureCpu::SetConfiguration() {
|
||||||
ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma);
|
ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma);
|
||||||
ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock);
|
ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock);
|
||||||
ui->cpuopt_unsafe_reduce_fp_error->setChecked(Settings::values.cpuopt_unsafe_reduce_fp_error);
|
ui->cpuopt_unsafe_reduce_fp_error->setChecked(Settings::values.cpuopt_unsafe_reduce_fp_error);
|
||||||
|
ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
|
||||||
|
ui->cpuopt_unsafe_inaccurate_nan->setChecked(Settings::values.cpuopt_unsafe_inaccurate_nan);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConfigureCpu::AccuracyUpdated(int index) {
|
void ConfigureCpu::AccuracyUpdated(int index) {
|
||||||
|
@ -61,6 +63,7 @@ void ConfigureCpu::ApplyConfiguration() {
|
||||||
static_cast<Settings::CPUAccuracy>(ui->accuracy->currentIndex());
|
static_cast<Settings::CPUAccuracy>(ui->accuracy->currentIndex());
|
||||||
Settings::values.cpuopt_unsafe_unfuse_fma = ui->cpuopt_unsafe_unfuse_fma->isChecked();
|
Settings::values.cpuopt_unsafe_unfuse_fma = ui->cpuopt_unsafe_unfuse_fma->isChecked();
|
||||||
Settings::values.cpuopt_unsafe_reduce_fp_error = ui->cpuopt_unsafe_reduce_fp_error->isChecked();
|
Settings::values.cpuopt_unsafe_reduce_fp_error = ui->cpuopt_unsafe_reduce_fp_error->isChecked();
|
||||||
|
Settings::values.cpuopt_unsafe_inaccurate_nan = ui->cpuopt_unsafe_inaccurate_nan->isChecked();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConfigureCpu::changeEvent(QEvent* event) {
|
void ConfigureCpu::changeEvent(QEvent* event) {
|
||||||
|
|
|
@ -109,6 +109,18 @@
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QCheckBox" name="cpuopt_unsafe_inaccurate_nan">
|
||||||
|
<property name="text">
|
||||||
|
<string>Inaccurate NaN handling</string>
|
||||||
|
</property>
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>
|
||||||
|
<div>This option improves speed by removing NaN checking. Please note this also reduces accuracy of certain floating-point instructions.</div>
|
||||||
|
</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
|
|
@ -142,7 +142,7 @@ constexpr int default_mouse_timeout = 2500;
|
||||||
/**
|
/**
|
||||||
* "Callouts" are one-time instructional messages shown to the user. In the config settings, there
|
* "Callouts" are one-time instructional messages shown to the user. In the config settings, there
|
||||||
* is a bitfield "callout_flags" options, used to track if a message has already been shown to the
|
* is a bitfield "callout_flags" options, used to track if a message has already been shown to the
|
||||||
* user. This is 32-bits - if we have more than 32 callouts, we should retire and recyle old ones.
|
* user. This is 32-bits - if we have more than 32 callouts, we should retire and recycle old ones.
|
||||||
*/
|
*/
|
||||||
enum class CalloutFlag : uint32_t {
|
enum class CalloutFlag : uint32_t {
|
||||||
Telemetry = 0x1,
|
Telemetry = 0x1,
|
||||||
|
|
|
@ -202,7 +202,7 @@ int main(int argc, char** argv) {
|
||||||
const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader);
|
const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader);
|
||||||
const u16 error_id = static_cast<u16>(load_result) - loader_id;
|
const u16 error_id = static_cast<u16>(load_result) - loader_id;
|
||||||
LOG_CRITICAL(Frontend,
|
LOG_CRITICAL(Frontend,
|
||||||
"While attempting to load the ROM requested, an error occured. Please "
|
"While attempting to load the ROM requested, an error occurred. Please "
|
||||||
"refer to the yuzu wiki for more information or the yuzu discord for "
|
"refer to the yuzu wiki for more information or the yuzu discord for "
|
||||||
"additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}",
|
"additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}",
|
||||||
loader_id, error_id, static_cast<Loader::ResultStatus>(error_id));
|
loader_id, error_id, static_cast<Loader::ResultStatus>(error_id));
|
||||||
|
|
|
@ -242,7 +242,7 @@ int main(int argc, char** argv) {
|
||||||
const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader);
|
const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader);
|
||||||
const u16 error_id = static_cast<u16>(load_result) - loader_id;
|
const u16 error_id = static_cast<u16>(load_result) - loader_id;
|
||||||
LOG_CRITICAL(Frontend,
|
LOG_CRITICAL(Frontend,
|
||||||
"While attempting to load the ROM requested, an error occured. Please "
|
"While attempting to load the ROM requested, an error occurred. Please "
|
||||||
"refer to the yuzu wiki for more information or the yuzu discord for "
|
"refer to the yuzu wiki for more information or the yuzu discord for "
|
||||||
"additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}",
|
"additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}",
|
||||||
loader_id, error_id, static_cast<Loader::ResultStatus>(error_id));
|
loader_id, error_id, static_cast<Loader::ResultStatus>(error_id));
|
||||||
|
|
Loading…
Reference in a new issue