diff --git a/README.md b/README.md index eb09f1d15..03e5221d5 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1871. +This is the source code for early-access 1873. ## Legal Notice diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 10bb01d99..20458d2ad 100755 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -29,6 +29,49 @@ IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32 } } +IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { + const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; + const IR::U1 z_flag{ir.GetZFlag()}; + const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; + const IR::U1 flip_logic{is_signed ? ir.Imm1(false) + : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), + ir.ILessThan(operand_2, zero, true))}; + switch (compare_op) { + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + case CompareOp::Equal: + return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); + case CompareOp::LessThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::GreaterThan: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), + ir.IGreaterThan(intermediate, zero, true))}; + const IR::U1 not_z{ir.LogicalNot(z_flag)}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); + } + case CompareOp::NotEqual: + return ir.LogicalOr(ir.INotEqual(intermediate, zero), + ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); + case CompareOp::GreaterThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), + ir.IGreaterThanEqual(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::True: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop) { switch (bop) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index f584060b3..214d0af3c 100755 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -11,6 +11,10 @@ namespace Shader::Maxwell { [[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, CompareOp compare_op, bool is_signed); +[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, + bool is_signed); + [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp index 34fa7345c..8ce1aee04 100755 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -9,49 +9,6 @@ namespace Shader::Maxwell { namespace { -IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, - CompareOp compare_op, bool is_signed) { - const IR::U32 zero{ir.Imm32(0)}; - const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; - const IR::U1 z_flag{ir.GetZFlag()}; - const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; - const IR::U1 flip_logic{is_signed ? ir.Imm1(false) - : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), - ir.ILessThan(operand_2, zero, true))}; - switch (compare_op) { - case CompareOp::False: - return ir.Imm1(false); - case CompareOp::LessThan: - return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), - ir.ILessThan(intermediate, zero, true))}; - case CompareOp::Equal: - return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); - case CompareOp::LessThanEqual: { - const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), - ir.ILessThan(intermediate, zero, true))}; - return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); - } - case CompareOp::GreaterThan: { - const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), - ir.IGreaterThan(intermediate, zero, true))}; - const IR::U1 not_z{ir.LogicalNot(z_flag)}; - return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); - } - case CompareOp::NotEqual: - return ir.LogicalOr(ir.INotEqual(intermediate, zero), - ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); - case CompareOp::GreaterThanEqual: { - const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), - ir.IGreaterThanEqual(intermediate, zero, true))}; - return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); - } - case CompareOp::True: - return ir.Imm1(true); - default: - throw NotImplementedException("Invalid compare op {}", compare_op); - } -} - IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, CompareOp compare_op, bool is_signed, bool x) { return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp index 7743701d0..bee10e5b9 100755 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -9,6 +9,12 @@ namespace Shader::Maxwell { namespace { +IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed, bool x) { + return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) + : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); +} + void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { union { u64 raw; @@ -17,15 +23,18 @@ void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { BitField<8, 8, IR::Reg> src_reg_a; BitField<39, 3, IR::Pred> bop_pred; BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> x; BitField<45, 2, BooleanOp> bop; BitField<48, 1, u64> is_signed; BitField<49, 3, CompareOp> compare_op; } const isetp{insn}; + const bool is_signed{isetp.is_signed != 0}; + const bool x{isetp.x != 0}; const BooleanOp bop{isetp.bop}; const CompareOp compare_op{isetp.compare_op}; const IR::U32 op_a{v.X(isetp.src_reg_a)}; - const IR::U1 comparison{IntegerCompare(v.ir, op_a, op_b, compare_op, isetp.is_signed != 0)}; + const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)}; const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 798b327d2..79c0bea0e 100755 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -186,11 +186,16 @@ public: /// Pop asynchronous downloads void PopAsyncFlushes(); - [[nodiscard]] bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); + bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); + + bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + /// Return true when a region is registered on the cache + [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); + /// Return true when a CPU region is modified from the CPU [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); @@ -516,8 +521,8 @@ bool BufferCache
::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am if (!cpu_src_address || !cpu_dest_address) { return false; } - const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount); - const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount); + const bool source_dirty = IsRegionRegistered(*cpu_src_address, amount); + const bool dest_dirty = IsRegionRegistered(*cpu_dest_address, amount); if (!source_dirty && !dest_dirty) { return false; } @@ -562,7 +567,7 @@ bool BufferCache
::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
}
runtime.CopyBuffer(dest_buffer, src_buffer, copies);
- if (source_dirty) {
+ if (IsRegionGpuModified(*cpu_src_address, amount)) {
dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount);
}
std::vector ::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
return true;
}
+template ::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
+ const std::optional ::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
u32 size) {
@@ -876,6 +912,27 @@ bool BufferCache ::IsRegionGpuModified(VAddr addr, size_t size) {
return false;
}
+template ::IsRegionRegistered(VAddr addr, size_t size) {
+ const VAddr end_addr = addr + size;
+ const u64 page_end = Common::DivCeil(end_addr, PAGE_SIZE);
+ for (u64 page = addr >> PAGE_BITS; page < page_end;) {
+ const BufferId buffer_id = page_table[page];
+ if (!buffer_id) {
+ ++page;
+ continue;
+ }
+ Buffer& buffer = slot_buffers[buffer_id];
+ const VAddr buf_start_addr = buffer.CpuAddr();
+ const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
+ if (buf_start_addr < end_addr && addr < buf_end_addr) {
+ return true;
+ }
+ page = Common::DivCeil(end_addr, PAGE_SIZE);
+ }
+ return false;
+}
+
template ::IsRegionCpuModified(VAddr addr, size_t size) {
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 9c657e32d..7a06531b4 100755
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -87,9 +87,11 @@ void MaxwellDMA::CopyPitchToPitch() {
// TODO: allow multisized components.
if (is_buffer_clear) {
ASSERT(regs.remap_const.component_size_minus_one == 3);
+ accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
std::vector