early-access version 3981
This commit is contained in:
parent
a6746533fb
commit
e9dc1e4faf
11 changed files with 62 additions and 61 deletions
|
@ -1,7 +1,7 @@
|
|||
yuzu emulator early access
|
||||
=============
|
||||
|
||||
This is the source code for early-access 3980.
|
||||
This is the source code for early-access 3981.
|
||||
|
||||
## Legal Notice
|
||||
|
||||
|
|
|
@ -559,12 +559,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
|||
const IR::Value& offset, const IR::Value& lod_clamp) {
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
ScopedRegister dpdx, dpdy, coords;
|
||||
const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
|
||||
const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp};
|
||||
if (multi_component) {
|
||||
// Allocate this early to avoid aliasing other registers
|
||||
dpdx = ScopedRegister{ctx.reg_alloc};
|
||||
dpdy = ScopedRegister{ctx.reg_alloc};
|
||||
if (info.num_derivates >= 3) {
|
||||
if (info.num_derivatives >= 3) {
|
||||
coords = ScopedRegister{ctx.reg_alloc};
|
||||
}
|
||||
}
|
||||
|
@ -584,7 +584,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
|||
dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
|
||||
dpdy.reg, derivatives_vec);
|
||||
Register final_coord;
|
||||
if (info.num_derivates >= 3) {
|
||||
if (info.num_derivatives >= 3) {
|
||||
ctx.Add("MOV.F {}.z,{}.x;"
|
||||
"MOV.F {}.z,{}.y;",
|
||||
dpdx.reg, coord_vec, dpdy.reg, coord_vec);
|
||||
|
|
|
@ -548,15 +548,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
|||
if (sparse_inst) {
|
||||
throw NotImplementedException("EmitImageGradient Sparse");
|
||||
}
|
||||
if (!offset.IsEmpty() && info.num_derivates <= 2) {
|
||||
if (!offset.IsEmpty() && info.num_derivatives <= 2) {
|
||||
throw NotImplementedException("EmitImageGradient offset");
|
||||
}
|
||||
const auto texture{Texture(ctx, info, index)};
|
||||
const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
|
||||
const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
|
||||
const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp};
|
||||
const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)};
|
||||
if (multi_component) {
|
||||
if (info.num_derivates >= 3) {
|
||||
if (info.num_derivatives >= 3) {
|
||||
const auto offset_vec{ctx.var_alloc.Consume(offset)};
|
||||
ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture,
|
||||
coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec);
|
||||
|
|
|
@ -67,22 +67,22 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates,
|
||||
Id offset, Id lod_clamp) {
|
||||
if (!Sirit::ValidId(derivates)) {
|
||||
throw LogicError("Derivates must be present");
|
||||
explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives,
|
||||
u32 num_derivatives, Id offset, Id lod_clamp) {
|
||||
if (!Sirit::ValidId(derivatives)) {
|
||||
throw LogicError("Derivatives must be present");
|
||||
}
|
||||
boost::container::static_vector<Id, 3> deriv_x_accum;
|
||||
boost::container::static_vector<Id, 3> deriv_y_accum;
|
||||
for (u32 i = 0; i < num_derivates; ++i) {
|
||||
deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2));
|
||||
deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1));
|
||||
for (u32 i = 0; i < num_derivatives; ++i) {
|
||||
deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2));
|
||||
deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2 + 1));
|
||||
}
|
||||
const Id derivates_X{ctx.OpCompositeConstruct(
|
||||
ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})};
|
||||
const Id derivates_Y{ctx.OpCompositeConstruct(
|
||||
ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})};
|
||||
Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y);
|
||||
const Id derivatives_X{ctx.OpCompositeConstruct(
|
||||
ctx.F32[num_derivatives], std::span{deriv_x_accum.data(), deriv_x_accum.size()})};
|
||||
const Id derivatives_Y{ctx.OpCompositeConstruct(
|
||||
ctx.F32[num_derivatives], std::span{deriv_y_accum.data(), deriv_y_accum.size()})};
|
||||
Add(spv::ImageOperandsMask::Grad, derivatives_X, derivatives_Y);
|
||||
if (Sirit::ValidId(offset)) {
|
||||
Add(spv::ImageOperandsMask::Offset, offset);
|
||||
}
|
||||
|
@ -91,26 +91,26 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates_1, Id derivates_2,
|
||||
explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives_1, Id derivatives_2,
|
||||
Id offset, Id lod_clamp) {
|
||||
if (!Sirit::ValidId(derivates_1) || !Sirit::ValidId(derivates_2)) {
|
||||
throw LogicError("Derivates must be present");
|
||||
if (!Sirit::ValidId(derivatives_1) || !Sirit::ValidId(derivatives_2)) {
|
||||
throw LogicError("Derivatives must be present");
|
||||
}
|
||||
boost::container::static_vector<Id, 3> deriv_1_accum{
|
||||
ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 0),
|
||||
ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 2),
|
||||
ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 0),
|
||||
ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 0),
|
||||
ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 2),
|
||||
ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 0),
|
||||
};
|
||||
boost::container::static_vector<Id, 3> deriv_2_accum{
|
||||
ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 1),
|
||||
ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 3),
|
||||
ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 1),
|
||||
ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 1),
|
||||
ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 3),
|
||||
ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 1),
|
||||
};
|
||||
const Id derivates_id1{ctx.OpCompositeConstruct(
|
||||
const Id derivatives_id1{ctx.OpCompositeConstruct(
|
||||
ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})};
|
||||
const Id derivates_id2{ctx.OpCompositeConstruct(
|
||||
const Id derivatives_id2{ctx.OpCompositeConstruct(
|
||||
ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})};
|
||||
Add(spv::ImageOperandsMask::Grad, derivates_id1, derivates_id2);
|
||||
Add(spv::ImageOperandsMask::Grad, derivatives_id1, derivatives_id2);
|
||||
if (Sirit::ValidId(offset)) {
|
||||
Add(spv::ImageOperandsMask::Offset, offset);
|
||||
}
|
||||
|
@ -556,12 +556,12 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
|
|||
}
|
||||
|
||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id derivates, Id offset, Id lod_clamp) {
|
||||
Id derivatives, Id offset, Id lod_clamp) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
const auto operands =
|
||||
info.num_derivates == 3
|
||||
? ImageOperands(ctx, info.has_lod_clamp != 0, derivates, offset, {}, lod_clamp)
|
||||
: ImageOperands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset,
|
||||
info.num_derivatives == 3
|
||||
? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, offset, {}, lod_clamp)
|
||||
: ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, info.num_derivatives, offset,
|
||||
lod_clamp);
|
||||
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
|
||||
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
|
||||
|
|
|
@ -543,7 +543,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i
|
|||
const IR::Value& skip_mips);
|
||||
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id derivates, Id offset, Id lod_clamp);
|
||||
Id derivatives, Id offset, Id lod_clamp);
|
||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
|
||||
Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index);
|
||||
|
|
|
@ -1864,11 +1864,11 @@ Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, Texture
|
|||
return Inst(op, Flags{info}, handle, coords);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates,
|
||||
Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivatives,
|
||||
const Value& offset, const F32& lod_clamp, TextureInstInfo info) {
|
||||
const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient
|
||||
: Opcode::BindlessImageGradient};
|
||||
return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp);
|
||||
return Inst(op, Flags{info}, handle, coords, derivatives, offset, lod_clamp);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
|
||||
|
|
|
@ -335,7 +335,7 @@ public:
|
|||
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
|
||||
const U32& lod, const U32& multisampling, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
|
||||
const Value& derivates, const Value& offset,
|
||||
const Value& derivatives, const Value& offset,
|
||||
const F32& lod_clamp, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
|
||||
void ImageWrite(const Value& handle, const Value& coords, const Value& color,
|
||||
|
|
|
@ -40,7 +40,7 @@ union TextureInstInfo {
|
|||
BitField<21, 1, u32> has_lod_clamp;
|
||||
BitField<22, 1, u32> relaxed_precision;
|
||||
BitField<23, 2, u32> gather_component;
|
||||
BitField<25, 2, u32> num_derivates;
|
||||
BitField<25, 2, u32> num_derivatives;
|
||||
BitField<27, 3, ImageFormat> image_format;
|
||||
BitField<30, 1, u32> ndv_is_active;
|
||||
};
|
||||
|
|
|
@ -59,7 +59,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
|
|||
BitField<51, 3, IR::Pred> sparse_pred;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> coord_reg;
|
||||
BitField<20, 8, IR::Reg> derivate_reg;
|
||||
BitField<20, 8, IR::Reg> derivative_reg;
|
||||
BitField<28, 3, TextureType> type;
|
||||
BitField<31, 4, u64> mask;
|
||||
BitField<36, 13, u64> cbuf_offset;
|
||||
|
@ -71,7 +71,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
|
|||
}
|
||||
|
||||
IR::Value coords;
|
||||
u32 num_derivates{};
|
||||
u32 num_derivatives{};
|
||||
IR::Reg base_reg{txd.coord_reg};
|
||||
IR::Reg last_reg;
|
||||
IR::Value handle;
|
||||
|
@ -90,42 +90,42 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
|
|||
switch (txd.type) {
|
||||
case TextureType::_1D: {
|
||||
coords = v.F(base_reg);
|
||||
num_derivates = 1;
|
||||
num_derivatives = 1;
|
||||
last_reg = base_reg + 1;
|
||||
break;
|
||||
}
|
||||
case TextureType::ARRAY_1D: {
|
||||
last_reg = base_reg + 1;
|
||||
coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
|
||||
num_derivates = 1;
|
||||
num_derivatives = 1;
|
||||
break;
|
||||
}
|
||||
case TextureType::_2D: {
|
||||
last_reg = base_reg + 2;
|
||||
coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
|
||||
num_derivates = 2;
|
||||
num_derivatives = 2;
|
||||
break;
|
||||
}
|
||||
case TextureType::ARRAY_2D: {
|
||||
last_reg = base_reg + 2;
|
||||
coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
|
||||
num_derivates = 2;
|
||||
num_derivatives = 2;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw NotImplementedException("Invalid texture type");
|
||||
}
|
||||
|
||||
const IR::Reg derivate_reg{txd.derivate_reg};
|
||||
IR::Value derivates;
|
||||
switch (num_derivates) {
|
||||
const IR::Reg derivative_reg{txd.derivative_reg};
|
||||
IR::Value derivatives;
|
||||
switch (num_derivatives) {
|
||||
case 1: {
|
||||
derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
|
||||
derivatives = v.ir.CompositeConstruct(v.F(derivative_reg), v.F(derivative_reg + 1));
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
|
||||
v.F(derivate_reg + 2), v.F(derivate_reg + 3));
|
||||
derivatives = v.ir.CompositeConstruct(v.F(derivative_reg), v.F(derivative_reg + 1),
|
||||
v.F(derivative_reg + 2), v.F(derivative_reg + 3));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -150,9 +150,10 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
|
|||
|
||||
IR::TextureInstInfo info{};
|
||||
info.type.Assign(GetType(txd.type));
|
||||
info.num_derivates.Assign(num_derivates);
|
||||
info.num_derivatives.Assign(num_derivatives);
|
||||
info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
|
||||
const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
|
||||
const IR::Value sample{
|
||||
v.ir.ImageGradient(handle, coords, derivatives, offset, lod_clamp, info)};
|
||||
|
||||
IR::Reg dest_reg{txd.dest_reg};
|
||||
for (size_t element = 0; element < 4; ++element) {
|
||||
|
|
|
@ -428,7 +428,7 @@ void FoldFPAdd32(IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
bool FoldDerivateYFromCorrection(IR::Inst& inst) {
|
||||
bool FoldDerivativeYFromCorrection(IR::Inst& inst) {
|
||||
const IR::Value lhs_value{inst.Arg(0)};
|
||||
const IR::Value rhs_value{inst.Arg(1)};
|
||||
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
|
||||
|
@ -464,7 +464,7 @@ void FoldFPMul32(IR::Inst& inst) {
|
|||
if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
if (FoldDerivateYFromCorrection(inst)) {
|
||||
if (FoldDerivativeYFromCorrection(inst)) {
|
||||
return;
|
||||
}
|
||||
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
|
||||
|
@ -699,7 +699,7 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) {
|
||||
bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coord) {
|
||||
if (coord.IsImmediate()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -834,7 +834,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
|
|||
IR::Inst* const inst2 = coords.InstRecursive();
|
||||
std::array<std::array<IR::Value, 3>, 3> results_matrix;
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) {
|
||||
if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -852,7 +852,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
|
|||
IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2],
|
||||
results_matrix[1][1], results_matrix[1][2]);
|
||||
IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]);
|
||||
info.num_derivates.Assign(3);
|
||||
info.num_derivatives.Assign(3);
|
||||
IR::Value new_gradient_instruction =
|
||||
ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info);
|
||||
IR::Inst* const new_inst = new_gradient_instruction.InstRecursive();
|
||||
|
|
|
@ -72,7 +72,7 @@ void Fermi2D::Blit() {
|
|||
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
|
||||
|
||||
const auto& args = regs.pixels_from_memory;
|
||||
constexpr s64 null_derivate = 1ULL << 32;
|
||||
constexpr s64 null_derivative = 1ULL << 32;
|
||||
Surface src = regs.src;
|
||||
const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
|
||||
const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 &&
|
||||
|
@ -89,7 +89,7 @@ void Fermi2D::Blit() {
|
|||
.operation = regs.operation,
|
||||
.filter = args.sample_mode.filter,
|
||||
.must_accelerate =
|
||||
args.du_dx != null_derivate || args.dv_dy != null_derivate || delegate_to_gpu,
|
||||
args.du_dx != null_derivative || args.dv_dy != null_derivative || delegate_to_gpu,
|
||||
.dst_x0 = args.dst_x0,
|
||||
.dst_y0 = args.dst_y0,
|
||||
.dst_x1 = args.dst_x0 + args.dst_width,
|
||||
|
|
Loading…
Add table
Reference in a new issue