mirror of
https://git.suyu.dev/suyu/suyu.git
synced 2025-01-10 17:51:01 +01:00
Y2R: Rework conversion process, enabling support for all formats
This commit is contained in:
parent
29d2b70ea4
commit
3e6663da43
5 changed files with 701 additions and 169 deletions
|
@ -96,6 +96,7 @@ set(SRCS
|
||||||
hw/gpu.cpp
|
hw/gpu.cpp
|
||||||
hw/hw.cpp
|
hw/hw.cpp
|
||||||
hw/lcd.cpp
|
hw/lcd.cpp
|
||||||
|
hw/y2r.cpp
|
||||||
loader/3dsx.cpp
|
loader/3dsx.cpp
|
||||||
loader/elf.cpp
|
loader/elf.cpp
|
||||||
loader/loader.cpp
|
loader/loader.cpp
|
||||||
|
@ -209,6 +210,7 @@ set(HEADERS
|
||||||
hw/gpu.h
|
hw/gpu.h
|
||||||
hw/hw.h
|
hw/hw.h
|
||||||
hw/lcd.h
|
hw/lcd.h
|
||||||
|
hw/y2r.h
|
||||||
loader/3dsx.h
|
loader/3dsx.h
|
||||||
loader/elf.h
|
loader/elf.h
|
||||||
loader/loader.h
|
loader/loader.h
|
||||||
|
|
|
@ -9,8 +9,8 @@
|
||||||
#include "core/hle/hle.h"
|
#include "core/hle/hle.h"
|
||||||
#include "core/hle/kernel/event.h"
|
#include "core/hle/kernel/event.h"
|
||||||
#include "core/hle/service/y2r_u.h"
|
#include "core/hle/service/y2r_u.h"
|
||||||
|
#include "core/hw/y2r.h"
|
||||||
#include "core/mem_map.h"
|
#include "core/mem_map.h"
|
||||||
#include "core/memory.h"
|
|
||||||
|
|
||||||
#include "video_core/utils.h"
|
#include "video_core/utils.h"
|
||||||
#include "video_core/video_core.h"
|
#include "video_core/video_core.h"
|
||||||
|
@ -20,47 +20,6 @@
|
||||||
|
|
||||||
namespace Y2R_U {
|
namespace Y2R_U {
|
||||||
|
|
||||||
enum class InputFormat : u8 {
|
|
||||||
/// 8-bit input, with YUV components in separate planes and using 4:2:2 subsampling.
|
|
||||||
YUV422_Indiv8 = 0,
|
|
||||||
/// 8-bit input, with YUV components in separate planes and using 4:2:0 subsampling.
|
|
||||||
YUV420_Indiv8 = 1,
|
|
||||||
|
|
||||||
YUV422_INDIV_16 = 2,
|
|
||||||
YUV420_INDIV_16 = 3,
|
|
||||||
YUV422_BATCH = 4,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class OutputFormat : u8 {
|
|
||||||
Rgb32 = 0,
|
|
||||||
Rgb24 = 1,
|
|
||||||
Rgb16_555 = 2,
|
|
||||||
Rgb16_565 = 3,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class Rotation : u8 {
|
|
||||||
None = 0,
|
|
||||||
Clockwise_90 = 1,
|
|
||||||
Clockwise_180 = 2,
|
|
||||||
Clockwise_270 = 3,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class BlockAlignment : u8 {
|
|
||||||
/// Image is output in linear format suitable for use as a framebuffer.
|
|
||||||
Linear = 0,
|
|
||||||
/// Image is output in tiled PICA format, suitable for use as a texture.
|
|
||||||
Block8x8 = 1,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class StandardCoefficient : u8 {
|
|
||||||
ITU_Rec601 = 0,
|
|
||||||
ITU_Rec709 = 1,
|
|
||||||
ITU_Rec601_Scaling = 2,
|
|
||||||
ITU_Rec709_Scaling = 3,
|
|
||||||
};
|
|
||||||
|
|
||||||
static Kernel::SharedPtr<Kernel::Event> completion_event;
|
|
||||||
|
|
||||||
struct ConversionParameters {
|
struct ConversionParameters {
|
||||||
InputFormat input_format;
|
InputFormat input_format;
|
||||||
OutputFormat output_format;
|
OutputFormat output_format;
|
||||||
|
@ -74,28 +33,60 @@ struct ConversionParameters {
|
||||||
};
|
};
|
||||||
static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size");
|
static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size");
|
||||||
|
|
||||||
struct ConversionBuffer {
|
static Kernel::SharedPtr<Kernel::Event> completion_event;
|
||||||
VAddr address;
|
static ConversionConfiguration conversion;
|
||||||
u32 image_size;
|
|
||||||
u16 transfer_unit;
|
static const CoefficientSet standard_coefficients[4] = {
|
||||||
u16 stride;
|
{{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601
|
||||||
|
{{ 0x100, 0x193, 0x77, 0x2F, 0x1DB, -0x1933, 0xA7C, -0x1D51 }}, // ITU_Rec709
|
||||||
|
{{ 0x12A, 0x198, 0xD0, 0x64, 0x204, -0x1BDE, 0x10F2, -0x229B }}, // ITU_Rec601_Scaling
|
||||||
|
{{ 0x12A, 0x1CA, 0x88, 0x36, 0x21C, -0x1F04, 0x99C, -0x2421 }}, // ITU_Rec709_Scaling
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ConversionData {
|
ResultCode ConversionConfiguration::SetInputLineWidth(u16 width) {
|
||||||
ConversionParameters params;
|
if (width == 0 || width > 1024 || width % 8 != 0) {
|
||||||
/// Input parameters for the Y (luma) plane
|
return ResultCode(ErrorDescription::OutOfRange, ErrorModule::CAM,
|
||||||
ConversionBuffer src_Y;
|
ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053FD
|
||||||
/// Output parameters for the conversion results
|
}
|
||||||
ConversionBuffer dst;
|
|
||||||
};
|
|
||||||
|
|
||||||
static ConversionData conversion;
|
// Note: The hardware uses the register value 0 to represent a width of 1024, so for a width of
|
||||||
|
// 1024 the `camera` module would set the value 0 here, but we don't need to emulate this
|
||||||
|
// internal detail.
|
||||||
|
this->input_line_width = width;
|
||||||
|
return RESULT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
ResultCode ConversionConfiguration::SetInputLines(u16 lines) {
|
||||||
|
if (lines == 0 || lines > 1024) {
|
||||||
|
return ResultCode(ErrorDescription::OutOfRange, ErrorModule::CAM,
|
||||||
|
ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053FD
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: In what appears to be a bug, the `camera` module does not set the hardware register at
|
||||||
|
// all if `lines` is 1024, so the conversion uses the last value that was set. The intention
|
||||||
|
// was probably to set it to 0 like in SetInputLineWidth.
|
||||||
|
if (lines != 1024) {
|
||||||
|
this->input_lines = lines;
|
||||||
|
}
|
||||||
|
return RESULT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) {
|
||||||
|
size_t index = static_cast<size_t>(standard_coefficient);
|
||||||
|
if (index >= 4) {
|
||||||
|
return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM,
|
||||||
|
ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED
|
||||||
|
}
|
||||||
|
|
||||||
|
std::memcpy(coefficients.data(), standard_coefficients[index].data(), sizeof(coefficients));
|
||||||
|
return RESULT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
static void SetInputFormat(Service::Interface* self) {
|
static void SetInputFormat(Service::Interface* self) {
|
||||||
u32* cmd_buff = Kernel::GetCommandBuffer();
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
conversion.params.input_format = static_cast<InputFormat>(cmd_buff[1]);
|
conversion.input_format = static_cast<InputFormat>(cmd_buff[1]);
|
||||||
LOG_DEBUG(Service_Y2R, "called input_format=%u", conversion.params.input_format);
|
LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
|
||||||
|
|
||||||
cmd_buff[1] = RESULT_SUCCESS.raw;
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
}
|
}
|
||||||
|
@ -103,8 +94,8 @@ static void SetInputFormat(Service::Interface* self) {
|
||||||
static void SetOutputFormat(Service::Interface* self) {
|
static void SetOutputFormat(Service::Interface* self) {
|
||||||
u32* cmd_buff = Kernel::GetCommandBuffer();
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
conversion.params.output_format = static_cast<OutputFormat>(cmd_buff[1]);
|
conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]);
|
||||||
LOG_DEBUG(Service_Y2R, "called output_format=%u", conversion.params.output_format);
|
LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
|
||||||
|
|
||||||
cmd_buff[1] = RESULT_SUCCESS.raw;
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
}
|
}
|
||||||
|
@ -112,8 +103,8 @@ static void SetOutputFormat(Service::Interface* self) {
|
||||||
static void SetRotation(Service::Interface* self) {
|
static void SetRotation(Service::Interface* self) {
|
||||||
u32* cmd_buff = Kernel::GetCommandBuffer();
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
conversion.params.rotation = static_cast<Rotation>(cmd_buff[1]);
|
conversion.rotation = static_cast<Rotation>(cmd_buff[1]);
|
||||||
LOG_DEBUG(Service_Y2R, "called rotation=%u", conversion.params.rotation);
|
LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
|
||||||
|
|
||||||
cmd_buff[1] = RESULT_SUCCESS.raw;
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
}
|
}
|
||||||
|
@ -121,12 +112,20 @@ static void SetRotation(Service::Interface* self) {
|
||||||
static void SetBlockAlignment(Service::Interface* self) {
|
static void SetBlockAlignment(Service::Interface* self) {
|
||||||
u32* cmd_buff = Kernel::GetCommandBuffer();
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
conversion.params.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]);
|
conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]);
|
||||||
LOG_DEBUG(Service_Y2R, "called alignment=%u", conversion.params.block_alignment);
|
LOG_DEBUG(Service_Y2R, "called alignment=%hhu", conversion.block_alignment);
|
||||||
|
|
||||||
cmd_buff[1] = RESULT_SUCCESS.raw;
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void SetTransferEndInterrupt(Service::Interface* self) {
|
||||||
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
|
cmd_buff[0] = 0x000D0040;
|
||||||
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
|
LOG_DEBUG(Service_Y2R, "(STUBBED) called");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Y2R_U::GetTransferEndEvent service function
|
* Y2R_U::GetTransferEndEvent service function
|
||||||
* Outputs:
|
* Outputs:
|
||||||
|
@ -147,11 +146,56 @@ static void SetSendingY(Service::Interface* self) {
|
||||||
conversion.src_Y.address = cmd_buff[1];
|
conversion.src_Y.address = cmd_buff[1];
|
||||||
conversion.src_Y.image_size = cmd_buff[2];
|
conversion.src_Y.image_size = cmd_buff[2];
|
||||||
conversion.src_Y.transfer_unit = cmd_buff[3];
|
conversion.src_Y.transfer_unit = cmd_buff[3];
|
||||||
conversion.src_Y.stride = cmd_buff[4];
|
conversion.src_Y.gap = cmd_buff[4];
|
||||||
u32 src_process_handle = cmd_buff[6];
|
u32 src_process_handle = cmd_buff[6];
|
||||||
LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
|
LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
|
||||||
"src_process_handle=0x%08X", conversion.src_Y.image_size,
|
"src_process_handle=0x%08X", conversion.src_Y.image_size,
|
||||||
conversion.src_Y.transfer_unit, conversion.src_Y.stride, src_process_handle);
|
conversion.src_Y.transfer_unit, conversion.src_Y.gap, src_process_handle);
|
||||||
|
|
||||||
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetSendingU(Service::Interface* self) {
|
||||||
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
|
conversion.src_U.address = cmd_buff[1];
|
||||||
|
conversion.src_U.image_size = cmd_buff[2];
|
||||||
|
conversion.src_U.transfer_unit = cmd_buff[3];
|
||||||
|
conversion.src_U.gap = cmd_buff[4];
|
||||||
|
u32 src_process_handle = cmd_buff[6];
|
||||||
|
LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
|
||||||
|
"src_process_handle=0x%08X", conversion.src_U.image_size,
|
||||||
|
conversion.src_U.transfer_unit, conversion.src_U.gap, src_process_handle);
|
||||||
|
|
||||||
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetSendingV(Service::Interface* self) {
|
||||||
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
|
conversion.src_V.address = cmd_buff[1];
|
||||||
|
conversion.src_V.image_size = cmd_buff[2];
|
||||||
|
conversion.src_V.transfer_unit = cmd_buff[3];
|
||||||
|
conversion.src_V.gap = cmd_buff[4];
|
||||||
|
u32 src_process_handle = cmd_buff[6];
|
||||||
|
LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
|
||||||
|
"src_process_handle=0x%08X", conversion.src_V.image_size,
|
||||||
|
conversion.src_V.transfer_unit, conversion.src_V.gap, src_process_handle);
|
||||||
|
|
||||||
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetSendingYUYV(Service::Interface* self) {
|
||||||
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
|
conversion.src_YUYV.address = cmd_buff[1];
|
||||||
|
conversion.src_YUYV.image_size = cmd_buff[2];
|
||||||
|
conversion.src_YUYV.transfer_unit = cmd_buff[3];
|
||||||
|
conversion.src_YUYV.gap = cmd_buff[4];
|
||||||
|
u32 src_process_handle = cmd_buff[6];
|
||||||
|
LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
|
||||||
|
"src_process_handle=0x%08X", conversion.src_YUYV.image_size,
|
||||||
|
conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, src_process_handle);
|
||||||
|
|
||||||
cmd_buff[1] = RESULT_SUCCESS.raw;
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
}
|
}
|
||||||
|
@ -162,11 +206,11 @@ static void SetReceiving(Service::Interface* self) {
|
||||||
conversion.dst.address = cmd_buff[1];
|
conversion.dst.address = cmd_buff[1];
|
||||||
conversion.dst.image_size = cmd_buff[2];
|
conversion.dst.image_size = cmd_buff[2];
|
||||||
conversion.dst.transfer_unit = cmd_buff[3];
|
conversion.dst.transfer_unit = cmd_buff[3];
|
||||||
conversion.dst.stride = cmd_buff[4];
|
conversion.dst.gap = cmd_buff[4];
|
||||||
u32 dst_process_handle = cmd_buff[6];
|
u32 dst_process_handle = cmd_buff[6];
|
||||||
LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
|
LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
|
||||||
"dst_process_handle=0x%08X", conversion.dst.image_size,
|
"dst_process_handle=0x%08X", conversion.dst.image_size,
|
||||||
conversion.dst.transfer_unit, conversion.dst.stride,
|
conversion.dst.transfer_unit, conversion.dst.gap,
|
||||||
dst_process_handle);
|
dst_process_handle);
|
||||||
|
|
||||||
cmd_buff[1] = RESULT_SUCCESS.raw;
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
|
@ -175,17 +219,42 @@ static void SetReceiving(Service::Interface* self) {
|
||||||
static void SetInputLineWidth(Service::Interface* self) {
|
static void SetInputLineWidth(Service::Interface* self) {
|
||||||
u32* cmd_buff = Kernel::GetCommandBuffer();
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
conversion.params.input_line_width = cmd_buff[1];
|
LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]);
|
||||||
LOG_DEBUG(Service_Y2R, "input_line_width=%u", conversion.params.input_line_width);
|
cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw;
|
||||||
|
|
||||||
cmd_buff[1] = RESULT_SUCCESS.raw;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void SetInputLines(Service::Interface* self) {
|
static void SetInputLines(Service::Interface* self) {
|
||||||
u32* cmd_buff = Kernel::GetCommandBuffer();
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
conversion.params.input_lines = cmd_buff[1];
|
LOG_DEBUG(Service_Y2R, "called input_line_number=%u", cmd_buff[1]);
|
||||||
LOG_DEBUG(Service_Y2R, "input_line_number=%u", conversion.params.input_lines);
|
cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetCoefficient(Service::Interface* self) {
|
||||||
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
|
const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]);
|
||||||
|
std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet));
|
||||||
|
LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]",
|
||||||
|
coefficients[0], coefficients[1], coefficients[2], coefficients[3],
|
||||||
|
coefficients[4], coefficients[5], coefficients[6], coefficients[7]);
|
||||||
|
|
||||||
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetStandardCoefficient(Service::Interface* self) {
|
||||||
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
|
LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", cmd_buff[1]);
|
||||||
|
|
||||||
|
cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)cmd_buff[1]).raw;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetAlpha(Service::Interface* self) {
|
||||||
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
|
conversion.alpha = cmd_buff[1];
|
||||||
|
LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
|
||||||
|
|
||||||
cmd_buff[1] = RESULT_SUCCESS.raw;
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
}
|
}
|
||||||
|
@ -193,89 +262,11 @@ static void SetInputLines(Service::Interface* self) {
|
||||||
static void StartConversion(Service::Interface* self) {
|
static void StartConversion(Service::Interface* self) {
|
||||||
u32* cmd_buff = Kernel::GetCommandBuffer();
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
const ConversionParameters& params = conversion.params;
|
HW::Y2R::PerformConversion(conversion);
|
||||||
|
|
||||||
const u8* srcY_buffer = Memory::GetPointer(conversion.src_Y.address);
|
// dst_image_size would seem to be perfect for this, but it doesn't include the gap :(
|
||||||
u8* dst_buffer = Memory::GetPointer(conversion.dst.address);
|
u32 total_output_size = conversion.input_lines *
|
||||||
|
(conversion.dst.transfer_unit + conversion.dst.gap);
|
||||||
// TODO: support color and other kinds of conversions
|
|
||||||
ASSERT(params.input_format == InputFormat::YUV422_Indiv8
|
|
||||||
|| params.input_format == InputFormat::YUV420_Indiv8);
|
|
||||||
ASSERT(params.output_format == OutputFormat::Rgb24);
|
|
||||||
ASSERT(params.rotation == Rotation::None);
|
|
||||||
const int bpp = 3;
|
|
||||||
|
|
||||||
switch (params.block_alignment) {
|
|
||||||
case BlockAlignment::Linear:
|
|
||||||
{
|
|
||||||
const size_t input_lines = params.input_lines;
|
|
||||||
const size_t input_line_width = params.input_line_width;
|
|
||||||
const size_t srcY_stride = conversion.src_Y.stride;
|
|
||||||
const size_t dst_stride = conversion.dst.stride;
|
|
||||||
|
|
||||||
size_t srcY_offset = 0;
|
|
||||||
size_t dst_offset = 0;
|
|
||||||
|
|
||||||
for (size_t line = 0; line < input_lines; ++line) {
|
|
||||||
for (size_t i = 0; i < input_line_width; ++i) {
|
|
||||||
u8 Y = srcY_buffer[srcY_offset];
|
|
||||||
dst_buffer[dst_offset + 0] = Y;
|
|
||||||
dst_buffer[dst_offset + 1] = Y;
|
|
||||||
dst_buffer[dst_offset + 2] = Y;
|
|
||||||
|
|
||||||
srcY_offset += 1;
|
|
||||||
dst_offset += bpp;
|
|
||||||
}
|
|
||||||
srcY_offset += srcY_stride;
|
|
||||||
dst_offset += dst_stride;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case BlockAlignment::Block8x8:
|
|
||||||
{
|
|
||||||
const size_t input_lines = params.input_lines;
|
|
||||||
const size_t input_line_width = params.input_line_width;
|
|
||||||
const size_t srcY_stride = conversion.src_Y.stride;
|
|
||||||
const size_t dst_transfer_unit = conversion.dst.transfer_unit;
|
|
||||||
const size_t dst_stride = conversion.dst.stride;
|
|
||||||
|
|
||||||
size_t srcY_offset = 0;
|
|
||||||
size_t dst_tile_line_offs = 0;
|
|
||||||
|
|
||||||
const size_t tile_size = 8 * 8 * bpp;
|
|
||||||
|
|
||||||
for (size_t line = 0; line < input_lines;) {
|
|
||||||
size_t max_line = line + 8;
|
|
||||||
|
|
||||||
for (; line < max_line; ++line) {
|
|
||||||
for (size_t x = 0; x < input_line_width; ++x) {
|
|
||||||
size_t tile_x = x / 8;
|
|
||||||
|
|
||||||
size_t dst_tile_offs = dst_tile_line_offs + tile_x * tile_size;
|
|
||||||
size_t tile_i = VideoCore::MortonInterleave((u32)x, (u32)line);
|
|
||||||
|
|
||||||
size_t dst_offset = dst_tile_offs + tile_i * bpp;
|
|
||||||
|
|
||||||
u8 Y = srcY_buffer[srcY_offset];
|
|
||||||
dst_buffer[dst_offset + 0] = Y;
|
|
||||||
dst_buffer[dst_offset + 1] = Y;
|
|
||||||
dst_buffer[dst_offset + 2] = Y;
|
|
||||||
|
|
||||||
srcY_offset += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
srcY_offset += srcY_stride;
|
|
||||||
}
|
|
||||||
|
|
||||||
dst_tile_line_offs += dst_transfer_unit + dst_stride;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// dst_image_size would seem to be perfect for this, but it doesn't include the stride :(
|
|
||||||
u32 total_output_size = params.input_lines *
|
|
||||||
(conversion.dst.transfer_unit + conversion.dst.stride);
|
|
||||||
VideoCore::g_renderer->hw_rasterizer->NotifyFlush(
|
VideoCore::g_renderer->hw_rasterizer->NotifyFlush(
|
||||||
Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size);
|
Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size);
|
||||||
|
|
||||||
|
@ -285,6 +276,14 @@ static void StartConversion(Service::Interface* self) {
|
||||||
cmd_buff[1] = RESULT_SUCCESS.raw;
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void StopConversion(Service::Interface* self) {
|
||||||
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
|
cmd_buff[0] = 0x00270040;
|
||||||
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
|
LOG_DEBUG(Service_Y2R, "called");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Y2R_U::IsBusyConversion service function
|
* Y2R_U::IsBusyConversion service function
|
||||||
* Outputs:
|
* Outputs:
|
||||||
|
@ -306,15 +305,31 @@ static void SetConversionParams(Service::Interface* self) {
|
||||||
u32* cmd_buff = Kernel::GetCommandBuffer();
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]);
|
auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]);
|
||||||
conversion.params = *params;
|
|
||||||
|
|
||||||
cmd_buff[0] = 0x00290000; // TODO verify
|
|
||||||
cmd_buff[1] = RESULT_SUCCESS.raw;
|
|
||||||
LOG_DEBUG(Service_Y2R,
|
LOG_DEBUG(Service_Y2R,
|
||||||
"called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
|
"called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
|
||||||
"input_line_width=%hX input_lines=%hu standard_coefficient=%hhu reserved=%hhu alpha=%hX",
|
"input_line_width=%hu input_lines=%hu standard_coefficient=%hhu "
|
||||||
|
"reserved=%hhu alpha=%hX",
|
||||||
params->input_format, params->output_format, params->rotation, params->block_alignment,
|
params->input_format, params->output_format, params->rotation, params->block_alignment,
|
||||||
params->input_line_width, params->input_lines, params->standard_coefficient);
|
params->input_line_width, params->input_lines, params->standard_coefficient,
|
||||||
|
params->reserved, params->alpha);
|
||||||
|
|
||||||
|
ResultCode result = RESULT_SUCCESS;
|
||||||
|
|
||||||
|
conversion.input_format = params->input_format;
|
||||||
|
conversion.output_format = params->output_format;
|
||||||
|
conversion.rotation = params->rotation;
|
||||||
|
conversion.block_alignment = params->block_alignment;
|
||||||
|
result = conversion.SetInputLineWidth(params->input_line_width);
|
||||||
|
if (result.IsError()) goto cleanup;
|
||||||
|
result = conversion.SetInputLines(params->input_lines);
|
||||||
|
if (result.IsError()) goto cleanup;
|
||||||
|
result = conversion.SetStandardCoefficient(params->standard_coefficient);
|
||||||
|
if (result.IsError()) goto cleanup;
|
||||||
|
conversion.alpha = params->alpha;
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
cmd_buff[0] = 0x00290040; // TODO verify
|
||||||
|
cmd_buff[1] = result.raw;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void PingProcess(Service::Interface* self) {
|
static void PingProcess(Service::Interface* self) {
|
||||||
|
@ -325,28 +340,63 @@ static void PingProcess(Service::Interface* self) {
|
||||||
LOG_WARNING(Service_Y2R, "(STUBBED) called");
|
LOG_WARNING(Service_Y2R, "(STUBBED) called");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void DriverInitialize(Service::Interface* self) {
|
||||||
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
|
conversion.input_format = InputFormat::YUV422_Indiv8;
|
||||||
|
conversion.output_format = OutputFormat::RGBA8;
|
||||||
|
conversion.rotation = Rotation::None;
|
||||||
|
conversion.block_alignment = BlockAlignment::Linear;
|
||||||
|
conversion.coefficients.fill(0);
|
||||||
|
conversion.SetInputLineWidth(1024);
|
||||||
|
conversion.SetInputLines(1024);
|
||||||
|
conversion.alpha = 0;
|
||||||
|
|
||||||
|
ConversionBuffer zero_buffer = {};
|
||||||
|
conversion.src_Y = zero_buffer;
|
||||||
|
conversion.src_U = zero_buffer;
|
||||||
|
conversion.src_V = zero_buffer;
|
||||||
|
conversion.dst = zero_buffer;
|
||||||
|
|
||||||
|
completion_event->Clear();
|
||||||
|
|
||||||
|
cmd_buff[0] = 0x002B0040;
|
||||||
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
|
LOG_DEBUG(Service_Y2R, "called");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void DriverFinalize(Service::Interface* self) {
|
||||||
|
u32* cmd_buff = Kernel::GetCommandBuffer();
|
||||||
|
|
||||||
|
cmd_buff[0] = 0x002C0040;
|
||||||
|
cmd_buff[1] = RESULT_SUCCESS.raw;
|
||||||
|
LOG_DEBUG(Service_Y2R, "called");
|
||||||
|
}
|
||||||
|
|
||||||
const Interface::FunctionInfo FunctionTable[] = {
|
const Interface::FunctionInfo FunctionTable[] = {
|
||||||
{0x00010040, SetInputFormat, "SetInputFormat"},
|
{0x00010040, SetInputFormat, "SetInputFormat"},
|
||||||
{0x00030040, SetOutputFormat, "SetOutputFormat"},
|
{0x00030040, SetOutputFormat, "SetOutputFormat"},
|
||||||
{0x00050040, SetRotation, "SetRotation"},
|
{0x00050040, SetRotation, "SetRotation"},
|
||||||
{0x00070040, SetBlockAlignment, "SetBlockAlignment"},
|
{0x00070040, SetBlockAlignment, "SetBlockAlignment"},
|
||||||
{0x000D0040, nullptr, "SetTransferEndInterrupt"},
|
{0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"},
|
||||||
{0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"},
|
{0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"},
|
||||||
{0x00100102, SetSendingY, "SetSendingY"},
|
{0x00100102, SetSendingY, "SetSendingY"},
|
||||||
{0x00110102, nullptr, "SetSendingU"},
|
{0x00110102, SetSendingU, "SetSendingU"},
|
||||||
{0x00120102, nullptr, "SetSendingV"},
|
{0x00120102, SetSendingV, "SetSendingV"},
|
||||||
|
{0x00130102, SetSendingYUYV, "SetSendingYUYV"},
|
||||||
{0x00180102, SetReceiving, "SetReceiving"},
|
{0x00180102, SetReceiving, "SetReceiving"},
|
||||||
{0x001A0040, SetInputLineWidth, "SetInputLineWidth"},
|
{0x001A0040, SetInputLineWidth, "SetInputLineWidth"},
|
||||||
{0x001C0040, SetInputLines, "SetInputLines"},
|
{0x001C0040, SetInputLines, "SetInputLines"},
|
||||||
{0x00200040, nullptr, "SetStandardCoefficient"},
|
{0x001E0100, SetCoefficient, "SetCoefficient"},
|
||||||
{0x00220040, nullptr, "SetAlpha"},
|
{0x00200040, SetStandardCoefficient, "SetStandardCoefficient"},
|
||||||
|
{0x00220040, SetAlpha, "SetAlpha"},
|
||||||
{0x00260000, StartConversion, "StartConversion"},
|
{0x00260000, StartConversion, "StartConversion"},
|
||||||
{0x00270000, nullptr, "StopConversion"},
|
{0x00270000, StopConversion, "StopConversion"},
|
||||||
{0x00280000, IsBusyConversion, "IsBusyConversion"},
|
{0x00280000, IsBusyConversion, "IsBusyConversion"},
|
||||||
{0x002901C0, SetConversionParams, "SetConversionParams"},
|
{0x002901C0, SetConversionParams, "SetConversionParams"},
|
||||||
{0x002A0000, PingProcess, "PingProcess"},
|
{0x002A0000, PingProcess, "PingProcess"},
|
||||||
{0x002B0000, nullptr, "DriverInitialize"},
|
{0x002B0000, DriverInitialize, "DriverInitialize"},
|
||||||
{0x002C0000, nullptr, "DriverFinalize"},
|
{0x002C0000, DriverFinalize, "DriverFinalize"},
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
@ -4,6 +4,10 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
#include "core/hle/service/service.h"
|
#include "core/hle/service/service.h"
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -11,6 +15,98 @@
|
||||||
|
|
||||||
namespace Y2R_U {
|
namespace Y2R_U {
|
||||||
|
|
||||||
|
enum class InputFormat : u8 {
|
||||||
|
/// 8-bit input, with YUV components in separate planes and 4:2:2 subsampling.
|
||||||
|
YUV422_Indiv8 = 0,
|
||||||
|
/// 8-bit input, with YUV components in separate planes and 4:2:0 subsampling.
|
||||||
|
YUV420_Indiv8 = 1,
|
||||||
|
|
||||||
|
/// 16-bit input (only LSB used), with YUV components in separate planes and 4:2:2 subsampling.
|
||||||
|
YUV422_Indiv16 = 2,
|
||||||
|
/// 16-bit input (only LSB used), with YUV components in separate planes and 4:2:0 subsampling.
|
||||||
|
YUV420_Indiv16 = 3,
|
||||||
|
|
||||||
|
/// 8-bit input, with a single interleaved stream in YUYV format and 4:2:2 subsampling.
|
||||||
|
YUYV422_Interleaved = 4,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class OutputFormat : u8 {
|
||||||
|
RGBA8 = 0,
|
||||||
|
RGB8 = 1,
|
||||||
|
RGB5A1 = 2,
|
||||||
|
RGB565 = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class Rotation : u8 {
|
||||||
|
None = 0,
|
||||||
|
Clockwise_90 = 1,
|
||||||
|
Clockwise_180 = 2,
|
||||||
|
Clockwise_270 = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class BlockAlignment : u8 {
|
||||||
|
/// Image is output in linear format suitable for use as a framebuffer.
|
||||||
|
Linear = 0,
|
||||||
|
/// Image is output in tiled PICA format, suitable for use as a texture.
|
||||||
|
Block8x8 = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class StandardCoefficient : u8 {
|
||||||
|
/// ITU Rec. BT.601 primaries, with PC ranges.
|
||||||
|
ITU_Rec601 = 0,
|
||||||
|
/// ITU Rec. BT.709 primaries, with PC ranges.
|
||||||
|
ITU_Rec709 = 1,
|
||||||
|
/// ITU Rec. BT.601 primaries, with TV ranges.
|
||||||
|
ITU_Rec601_Scaling = 2,
|
||||||
|
/// ITU Rec. BT.709 primaries, with TV ranges.
|
||||||
|
ITU_Rec709_Scaling = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A set of coefficients configuring the RGB to YUV conversion. Coefficients 0-4 are unsigned 2.8
|
||||||
|
* fixed pointer numbers representing entries on the conversion matrix, while coefficient 5-7 are
|
||||||
|
* signed 11.5 fixed point numbers added as offsets to the RGB result.
|
||||||
|
*
|
||||||
|
* The overall conversion process formula is:
|
||||||
|
* ```
|
||||||
|
* R = trunc((c_0 * Y + c_1 * V) + c_5 + 0.75)
|
||||||
|
* G = trunc((c_0 * Y - c_3 * U - c_2 * V) + c_6 + 0.75)
|
||||||
|
* B = trunc((c_0 * Y + c_4 * U ) + c_7 + 0.75)
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
using CoefficientSet = std::array<s16, 8>;
|
||||||
|
|
||||||
|
struct ConversionBuffer {
|
||||||
|
/// Current reading/writing address of this buffer.
|
||||||
|
VAddr address;
|
||||||
|
/// Remaining amount of bytes to be DMAed, does not include the inter-trasfer gap.
|
||||||
|
u32 image_size;
|
||||||
|
/// Size of a single DMA transfer.
|
||||||
|
u16 transfer_unit;
|
||||||
|
/// Amount of bytes to be skipped between copying each `transfer_unit` bytes.
|
||||||
|
u16 gap;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ConversionConfiguration {
|
||||||
|
InputFormat input_format;
|
||||||
|
OutputFormat output_format;
|
||||||
|
Rotation rotation;
|
||||||
|
BlockAlignment block_alignment;
|
||||||
|
u16 input_line_width;
|
||||||
|
u16 input_lines;
|
||||||
|
CoefficientSet coefficients;
|
||||||
|
u16 alpha;
|
||||||
|
|
||||||
|
/// Input parameters for the Y (luma) plane
|
||||||
|
ConversionBuffer src_Y, src_U, src_V, src_YUYV;
|
||||||
|
/// Output parameters for the conversion results
|
||||||
|
ConversionBuffer dst;
|
||||||
|
|
||||||
|
ResultCode SetInputLineWidth(u16 width);
|
||||||
|
ResultCode SetInputLines(u16 lines);
|
||||||
|
ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient);
|
||||||
|
};
|
||||||
|
|
||||||
class Interface : public Service::Interface {
|
class Interface : public Service::Interface {
|
||||||
public:
|
public:
|
||||||
Interface();
|
Interface();
|
||||||
|
|
369
src/core/hw/y2r.cpp
Normal file
369
src/core/hw/y2r.cpp
Normal file
|
@ -0,0 +1,369 @@
|
||||||
|
// Copyright 2015 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/color.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "common/math_util.h"
|
||||||
|
#include "common/vector_math.h"
|
||||||
|
|
||||||
|
#include "core/hle/service/y2r_u.h"
|
||||||
|
#include "core/memory.h"
|
||||||
|
|
||||||
|
namespace HW {
|
||||||
|
namespace Y2R {
|
||||||
|
|
||||||
|
using namespace Y2R_U;
|
||||||
|
|
||||||
|
static const size_t MAX_TILES = 1024 / 8;
|
||||||
|
static const size_t TILE_SIZE = 8 * 8;
|
||||||
|
using ImageTile = std::array<u32, TILE_SIZE>;
|
||||||
|
|
||||||
|
/// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles.
|
||||||
|
static void ConvertYUVToRGB(InputFormat input_format,
|
||||||
|
const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[],
|
||||||
|
unsigned int width, unsigned int height, const CoefficientSet& coefficients) {
|
||||||
|
|
||||||
|
for (unsigned int y = 0; y < height; ++y) {
|
||||||
|
for (unsigned int x = 0; x < width; ++x) {
|
||||||
|
s32 Y, U, V;
|
||||||
|
switch (input_format) {
|
||||||
|
case InputFormat::YUV422_Indiv8:
|
||||||
|
case InputFormat::YUV422_Indiv16:
|
||||||
|
Y = input_Y[y * width + x];
|
||||||
|
U = input_U[(y * width + x) / 2];
|
||||||
|
V = input_V[(y * width + x) / 2];
|
||||||
|
break;
|
||||||
|
case InputFormat::YUV420_Indiv8:
|
||||||
|
case InputFormat::YUV420_Indiv16:
|
||||||
|
Y = input_Y[y * width + x];
|
||||||
|
U = input_U[((y / 2) * width + x) / 2];
|
||||||
|
V = input_V[((y / 2) * width + x) / 2];
|
||||||
|
break;
|
||||||
|
case InputFormat::YUYV422_Interleaved:
|
||||||
|
Y = input_Y[(y * width + x) * 2];
|
||||||
|
U = input_Y[(y * width + (x / 2) * 2) * 2 + 1];
|
||||||
|
V = input_Y[(y * width + (x / 2) * 2) * 2 + 3];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This conversion process is bit-exact with hardware, as far as could be tested.
|
||||||
|
auto& c = coefficients;
|
||||||
|
s32 cY = c[0]*Y;
|
||||||
|
|
||||||
|
s32 r = cY + c[1]*V;
|
||||||
|
s32 g = cY - c[3]*U - c[2]*V;
|
||||||
|
s32 b = cY + c[4]*U;
|
||||||
|
|
||||||
|
const s32 rounding_offset = 0x18;
|
||||||
|
r = (r >> 3) + c[5] + rounding_offset;
|
||||||
|
g = (g >> 3) + c[6] + rounding_offset;
|
||||||
|
b = (b >> 3) + c[7] + rounding_offset;
|
||||||
|
|
||||||
|
unsigned int tile = x / 8;
|
||||||
|
unsigned int tile_x = x % 8;
|
||||||
|
u32* out = &output[tile][y * 8 + tile_x];
|
||||||
|
|
||||||
|
using MathUtil::Clamp;
|
||||||
|
*out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) |
|
||||||
|
((u32)Clamp(g >> 5, 0, 0xFF) << 16) |
|
||||||
|
((u32)Clamp(b >> 5, 0, 0xFF) << 8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit.
|
||||||
|
template <size_t N>
|
||||||
|
static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) {
|
||||||
|
const u8* input = Memory::GetPointer(buf.address);
|
||||||
|
|
||||||
|
size_t output_unit = buf.transfer_unit / N;
|
||||||
|
ASSERT(amount_of_data % output_unit == 0);
|
||||||
|
|
||||||
|
while (amount_of_data > 0) {
|
||||||
|
for (size_t i = 0; i < output_unit; ++i) {
|
||||||
|
output[i] = input[i * N];
|
||||||
|
}
|
||||||
|
|
||||||
|
output += output_unit;
|
||||||
|
input += buf.transfer_unit + buf.gap;
|
||||||
|
|
||||||
|
buf.address += buf.transfer_unit + buf.gap;
|
||||||
|
buf.image_size -= buf.transfer_unit;
|
||||||
|
amount_of_data -= output_unit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer.
|
||||||
|
static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data,
|
||||||
|
OutputFormat output_format, u8 alpha) {
|
||||||
|
|
||||||
|
u8* output = Memory::GetPointer(buf.address);
|
||||||
|
|
||||||
|
while (amount_of_data > 0) {
|
||||||
|
u8* unit_end = output + buf.transfer_unit;
|
||||||
|
while (output < unit_end) {
|
||||||
|
u32 color = *input++;
|
||||||
|
Math::Vec4<u8> col_vec{
|
||||||
|
(color >> 24) & 0xFF, (color >> 16) & 0xFF, (color >> 8) & 0xFF, alpha,
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (output_format) {
|
||||||
|
case OutputFormat::RGBA8:
|
||||||
|
Color::EncodeRGBA8(col_vec, output);
|
||||||
|
output += 4;
|
||||||
|
break;
|
||||||
|
case OutputFormat::RGB8:
|
||||||
|
Color::EncodeRGB8(col_vec, output);
|
||||||
|
output += 3;
|
||||||
|
break;
|
||||||
|
case OutputFormat::RGB5A1:
|
||||||
|
Color::EncodeRGB5A1(col_vec, output);
|
||||||
|
output += 2;
|
||||||
|
break;
|
||||||
|
case OutputFormat::RGB565:
|
||||||
|
Color::EncodeRGB565(col_vec, output);
|
||||||
|
output += 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
amount_of_data -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
output += buf.gap;
|
||||||
|
buf.address += buf.transfer_unit + buf.gap;
|
||||||
|
buf.image_size -= buf.transfer_unit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static const u8 linear_lut[64] = {
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7,
|
||||||
|
8, 9, 10, 11, 12, 13, 14, 15,
|
||||||
|
16, 17, 18, 19, 20, 21, 22, 23,
|
||||||
|
24, 25, 26, 27, 28, 29, 30, 31,
|
||||||
|
32, 33, 34, 35, 36, 37, 38, 39,
|
||||||
|
40, 41, 42, 43, 44, 45, 46, 47,
|
||||||
|
48, 49, 50, 51, 52, 53, 54, 55,
|
||||||
|
56, 57, 58, 59, 60, 61, 62, 63,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const u8 morton_lut[64] = {
|
||||||
|
0, 1, 4, 5, 16, 17, 20, 21,
|
||||||
|
2, 3, 6, 7, 18, 19, 22, 23,
|
||||||
|
8, 9, 12, 13, 24, 25, 28, 29,
|
||||||
|
10, 11, 14, 15, 26, 27, 30, 31,
|
||||||
|
32, 33, 36, 37, 48, 49, 52, 53,
|
||||||
|
34, 35, 38, 39, 50, 51, 54, 55,
|
||||||
|
40, 41, 44, 45, 56, 57, 60, 61,
|
||||||
|
42, 43, 46, 47, 58, 59, 62, 63,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
|
||||||
|
for (int i = 0; i < height * 8; ++i) {
|
||||||
|
output[out_map[i]] = input[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
|
||||||
|
int out_i = 0;
|
||||||
|
for (int x = 0; x < 8; ++x) {
|
||||||
|
for (int y = height - 1; y >= 0; --y) {
|
||||||
|
output[out_map[out_i++]] = input[y * 8 + x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
|
||||||
|
int out_i = 0;
|
||||||
|
for (int i = height * 8 - 1; i >= 0; --i) {
|
||||||
|
output[out_map[out_i++]] = input[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) {
|
||||||
|
int out_i = 0;
|
||||||
|
for (int x = 8-1; x >= 0; --x) {
|
||||||
|
for (int y = 0; y < height; ++y) {
|
||||||
|
output[out_map[out_i++]] = input[y * 8 + x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void WriteTileToOutput(u32* output, const ImageTile& tile, int height, int line_stride) {
|
||||||
|
for (int y = 0; y < height; ++y) {
|
||||||
|
for (int x = 0; x < 8; ++x) {
|
||||||
|
output[y * line_stride + x] = tile[y * 8 + x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a Y2R colorspace conversion.
|
||||||
|
*
|
||||||
|
* The Y2R hardware implements hardware-accelerated YUV to RGB colorspace conversions. It is most
|
||||||
|
* commonly used for video playback or to display camera input to the screen.
|
||||||
|
*
|
||||||
|
* The conversion process is quite configurable, and can be divided in distinct steps. From
|
||||||
|
* observation, it appears that the hardware buffers a single 8-pixel tall strip of image data
|
||||||
|
* internally and converts it in one go before writing to the output and loading the next strip.
|
||||||
|
*
|
||||||
|
* The steps taken to convert one strip of image data are:
|
||||||
|
*
|
||||||
|
* - The hardware receives data via CDMA (http://3dbrew.org/wiki/Corelink_DMA_Engines), which is
|
||||||
|
* presumably stored in one or more internal buffers. This process can be done in several separate
|
||||||
|
* transfers, as long as they don't exceed the size of the internal image buffer. This allows
|
||||||
|
* flexibility in input strides.
|
||||||
|
* - The input data is decoded into a YUV tuple. Several formats are suported, see the `InputFormat`
|
||||||
|
* enum.
|
||||||
|
* - The YUV tuple is converted, using fixed point calculations, to RGB. This step can be configured
|
||||||
|
* using a set of coefficients to support different colorspace standards. See `CoefficientSet`.
|
||||||
|
* - The strip can be optionally rotated 90, 180 or 270 degrees. Since each strip is processed
|
||||||
|
* independently, this notably rotates each *strip*, not the entire image. This means that for 90
|
||||||
|
* or 270 degree rotations, the output will be in terms of several 8 x height images, and for any
|
||||||
|
* non-zero rotation the strips will have to be re-arranged so that the parts of the image will
|
||||||
|
* not be shuffled together. This limitation makes this a feature of somewhat dubious utility. 90
|
||||||
|
* or 270 degree rotations in images with non-even height don't seem to work properly.
|
||||||
|
* - The data is converted to the output RGB format. See the `OutputFormat` enum.
|
||||||
|
* - The data can be output either linearly line-by-line or in the swizzled 8x8 tile format used by
|
||||||
|
* the PICA. This is decided by the `BlockAlignment` enum. If 8x8 alignment is used, then the
|
||||||
|
* image must have a height divisible by 8. The image width must always be divisible by 8.
|
||||||
|
* - The final data is then CDMAed out to main memory and the next image strip is processed. This
|
||||||
|
* offers the same flexibility as the input stage.
|
||||||
|
*
|
||||||
|
* In this implementation, to avoid the combinatorial explosion of parameter combinations, common
|
||||||
|
* intermediate formats are used and where possible tables or parameters are used instead of
|
||||||
|
* diverging code paths to keep the amount of branches in check. Some steps are also merged to
|
||||||
|
* increase efficiency.
|
||||||
|
*
|
||||||
|
* Output for all valid settings combinations matches hardware, however output in some edge-cases
|
||||||
|
* differs:
|
||||||
|
*
|
||||||
|
* - `Block8x8` alignment with non-mod8 height produces different garbage patterns on the last
|
||||||
|
* strip, especially when combined with rotation.
|
||||||
|
* - Hardware, when using `Linear` alignment with a non-even height and 90 or 270 degree rotation
|
||||||
|
* produces misaligned output on the last strip. This implmentation produces output with the
|
||||||
|
* correct "expected" alignment.
|
||||||
|
*
|
||||||
|
* Hardware behaves strangely (doesn't fire the completion interrupt, for example) in these cases,
|
||||||
|
* so they are believed to be invalid configurations anyway.
|
||||||
|
*/
|
||||||
|
void PerformConversion(ConversionConfiguration& cvt) {
|
||||||
|
ASSERT(cvt.input_line_width % 8 == 0);
|
||||||
|
ASSERT(cvt.block_alignment != BlockAlignment::Block8x8 || cvt.input_lines % 8 == 0);
|
||||||
|
// Tiles per row
|
||||||
|
size_t num_tiles = cvt.input_line_width / 8;
|
||||||
|
ASSERT(num_tiles < MAX_TILES);
|
||||||
|
|
||||||
|
// Buffer used as a CDMA source/target.
|
||||||
|
std::unique_ptr<u8[]> data_buffer(new u8[cvt.input_line_width * 8 * 4]);
|
||||||
|
// Intermediate storage for decoded 8x8 image tiles. Always stored as RGB32.
|
||||||
|
std::unique_ptr<ImageTile[]> tiles(new ImageTile[num_tiles]);
|
||||||
|
ImageTile tmp_tile;
|
||||||
|
|
||||||
|
// LUT used to remap writes to a tile. Used to allow linear or swizzled output without
|
||||||
|
// requiring two different code paths.
|
||||||
|
const u8* tile_remap;
|
||||||
|
switch (cvt.block_alignment) {
|
||||||
|
case BlockAlignment::Linear:
|
||||||
|
tile_remap = linear_lut; break;
|
||||||
|
case BlockAlignment::Block8x8:
|
||||||
|
tile_remap = morton_lut; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned int y = 0; y < cvt.input_lines; y += 8) {
|
||||||
|
unsigned int row_height = std::min(cvt.input_lines - y, 8u);
|
||||||
|
|
||||||
|
// Total size in pixels of incoming data required for this strip.
|
||||||
|
const size_t row_data_size = row_height * cvt.input_line_width;
|
||||||
|
|
||||||
|
u8* input_Y = data_buffer.get();
|
||||||
|
u8* input_U = input_Y + 8 * cvt.input_line_width;
|
||||||
|
u8* input_V = input_U + 8 * cvt.input_line_width / 2;
|
||||||
|
|
||||||
|
switch (cvt.input_format) {
|
||||||
|
case InputFormat::YUV422_Indiv8:
|
||||||
|
ReceiveData<1>(input_Y, cvt.src_Y, row_data_size);
|
||||||
|
ReceiveData<1>(input_U, cvt.src_U, row_data_size / 2);
|
||||||
|
ReceiveData<1>(input_V, cvt.src_V, row_data_size / 2);
|
||||||
|
break;
|
||||||
|
case InputFormat::YUV420_Indiv8:
|
||||||
|
ReceiveData<1>(input_Y, cvt.src_Y, row_data_size);
|
||||||
|
ReceiveData<1>(input_U, cvt.src_U, row_data_size / 4);
|
||||||
|
ReceiveData<1>(input_V, cvt.src_V, row_data_size / 4);
|
||||||
|
break;
|
||||||
|
case InputFormat::YUV422_Indiv16:
|
||||||
|
ReceiveData<2>(input_Y, cvt.src_Y, row_data_size);
|
||||||
|
ReceiveData<2>(input_U, cvt.src_U, row_data_size / 2);
|
||||||
|
ReceiveData<2>(input_V, cvt.src_V, row_data_size / 2);
|
||||||
|
break;
|
||||||
|
case InputFormat::YUV420_Indiv16:
|
||||||
|
ReceiveData<2>(input_Y, cvt.src_Y, row_data_size);
|
||||||
|
ReceiveData<2>(input_U, cvt.src_U, row_data_size / 4);
|
||||||
|
ReceiveData<2>(input_V, cvt.src_V, row_data_size / 4);
|
||||||
|
break;
|
||||||
|
case InputFormat::YUYV422_Interleaved:
|
||||||
|
input_U = nullptr;
|
||||||
|
input_V = nullptr;
|
||||||
|
ReceiveData<1>(input_Y, cvt.src_YUYV, row_data_size * 2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note(yuriks): If additional optimization is required, input_format can be moved to a
|
||||||
|
// template parameter, so that its dispatch can be moved to outside the inner loop.
|
||||||
|
ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(),
|
||||||
|
cvt.input_line_width, row_height, cvt.coefficients);
|
||||||
|
|
||||||
|
u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get());
|
||||||
|
|
||||||
|
for (int i = 0; i < num_tiles; ++i) {
|
||||||
|
int image_strip_width, output_stride;
|
||||||
|
|
||||||
|
switch (cvt.rotation) {
|
||||||
|
case Rotation::None:
|
||||||
|
RotateTile0(tiles[i], tmp_tile, row_height, tile_remap);
|
||||||
|
image_strip_width = cvt.input_line_width;
|
||||||
|
output_stride = 8;
|
||||||
|
break;
|
||||||
|
case Rotation::Clockwise_90:
|
||||||
|
RotateTile90(tiles[i], tmp_tile, row_height, tile_remap);
|
||||||
|
image_strip_width = 8;
|
||||||
|
output_stride = 8 * row_height;
|
||||||
|
break;
|
||||||
|
case Rotation::Clockwise_180:
|
||||||
|
// For 180 and 270 degree rotations we also invert the order of tiles in the strip,
|
||||||
|
// since the rotates are done individually on each tile.
|
||||||
|
RotateTile180(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap);
|
||||||
|
image_strip_width = cvt.input_line_width;
|
||||||
|
output_stride = 8;
|
||||||
|
break;
|
||||||
|
case Rotation::Clockwise_270:
|
||||||
|
RotateTile270(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap);
|
||||||
|
image_strip_width = 8;
|
||||||
|
output_stride = 8 * row_height;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (cvt.block_alignment) {
|
||||||
|
case BlockAlignment::Linear:
|
||||||
|
WriteTileToOutput(output_buffer, tmp_tile, row_height, image_strip_width);
|
||||||
|
output_buffer += output_stride;
|
||||||
|
break;
|
||||||
|
case BlockAlignment::Block8x8:
|
||||||
|
WriteTileToOutput(output_buffer, tmp_tile, 8, 8);
|
||||||
|
output_buffer += TILE_SIZE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note(yuriks): If additional optimization is required, output_format can be moved to a
|
||||||
|
// template parameter, so that its dispatch can be moved to outside the inner loop.
|
||||||
|
SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
15
src/core/hw/y2r.h
Normal file
15
src/core/hw/y2r.h
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
// Copyright 2015 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
namespace Y2R_U {
|
||||||
|
struct ConversionConfiguration;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace HW {
|
||||||
|
namespace Y2R {
|
||||||
|
|
||||||
|
void PerformConversion(Y2R_U::ConversionConfiguration& cvt);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue