diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 7b479b569..649640e72 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -45,6 +45,7 @@ static std::shared_ptr<Logger> global_logger;
         SUB(Service, SOC) \
         CLS(HW) \
         SUB(HW, Memory) \
+        SUB(HW, LCD) \
         SUB(HW, GPU) \
         CLS(Frontend) \
         CLS(Render) \
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index 7b67b3c07..83d64145b 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -65,6 +65,7 @@ enum class Class : ClassType {
     Service_SOC,                ///< The SOC (Socket) service
     HW,                         ///< Low-level hardware emulation
     HW_Memory,                  ///< Memory-map and address translation
+    HW_LCD,                     ///< LCD register emulation
     HW_GPU,                     ///< GPU control emulation
     Frontend,                   ///< Emulator UI
     Render,                     ///< Emulator video output and hardware acceleration
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 212da25c5..33e5be3a4 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -87,6 +87,7 @@ set(SRCS
             hle/svc.cpp
             hw/gpu.cpp
             hw/hw.cpp
+            hw/lcd.cpp
             loader/elf.cpp
             loader/loader.cpp
             loader/ncch.cpp
@@ -196,6 +197,7 @@ set(HEADERS
             hle/svc.h
             hw/gpu.h
             hw/hw.h
+            hw/lcd.h
             loader/elf.h
             loader/loader.h
             loader/ncch.h
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index c23cfa3c8..cff585698 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -7,14 +7,20 @@
 #include "core/mem_map.h"
 #include "core/hle/kernel/event.h"
 #include "core/hle/kernel/shared_memory.h"
+#include "core/hle/result.h"
 #include "gsp_gpu.h"
+#include "core/hw/hw.h"
 #include "core/hw/gpu.h"
+#include "core/hw/lcd.h"
 
 #include "video_core/gpu_debugger.h"
 
 // Main graphics debugger object - TODO: Here is probably not the best place for this
 GraphicsDebugger g_debugger;
 
+// Beginning address of HW regs
+const static u32 REGS_BEGIN = 0x1EB00000;
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Namespace GSP_GPU
 
@@ -85,7 +91,7 @@ static void WriteHWRegs(u32 base_address, u32 size_in_bytes, const u32* data) {
         return;
 
     while (size_in_bytes > 0) {
-        GPU::Write<u32>(base_address + 0x1EB00000, *data);
+        HW::Write<u32>(base_address + REGS_BEGIN, *data);
 
         size_in_bytes -= 4;
         ++data;
@@ -128,15 +134,15 @@ static void WriteHWRegsWithMask(u32 base_address, u32 size_in_bytes, const u32*
         return;
 
     while (size_in_bytes > 0) {
-        const u32 reg_address = base_address + 0x1EB00000;
+        const u32 reg_address = base_address + REGS_BEGIN;
 
         u32 reg_value;
-        GPU::Read<u32>(reg_value, reg_address);
+        HW::Read<u32>(reg_value, reg_address);
 
         // Update the current value of the register only for set mask bits
         reg_value = (reg_value & ~*masks) | (*data | *masks);
 
-        GPU::Write<u32>(reg_address, reg_value);
+        HW::Write<u32>(reg_address, reg_value);
 
         size_in_bytes -= 4;
         ++data;
@@ -188,7 +194,7 @@ static void ReadHWRegs(Service::Interface* self) {
     u32* dst = (u32*)Memory::GetPointer(cmd_buff[0x41]);
 
     while (size > 0) {
-        GPU::Read<u32>(*dst, reg_addr + 0x1EB00000);
+        HW::Read<u32>(*dst, reg_addr + REGS_BEGIN);
 
         size -= 4;
         ++dst;
@@ -427,6 +433,32 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
     }
 }
 
+/**
+ * GSP_GPU::SetLcdForceBlack service function
+ *
+ * Enable or disable REG_LCDCOLORFILL with the color black.
+ *
+ *  Inputs:
+ *      1: Black color fill flag (0 = don't fill, !0 = fill)
+ *  Outputs:
+ *      1: Result code
+ */
+static void SetLcdForceBlack(Service::Interface* self) {
+    u32* cmd_buff = Kernel::GetCommandBuffer();
+
+    bool enable_black = cmd_buff[1] != 0;
+    LCD::Regs::ColorFill data = {0};
+
+    // Since data is already zeroed, there is no need to explicitly set
+    // the color to black (all zero).
+    data.is_enabled = enable_black;
+
+    LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_top), data.raw); // Top LCD
+    LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_bottom), data.raw); // Bottom LCD
+    
+    cmd_buff[1] = RESULT_SUCCESS.raw;
+}
+
 /// This triggers handling of the GX command written to the command buffer in shared memory.
 static void TriggerCmdReqQueue(Service::Interface* self) {
     // Iterate through each thread's command queue...
@@ -460,7 +492,7 @@ const Interface::FunctionInfo FunctionTable[] = {
     {0x00080082, FlushDataCache,                "FlushDataCache"},
     {0x00090082, nullptr,                       "InvalidateDataCache"},
     {0x000A0044, nullptr,                       "RegisterInterruptEvents"},
-    {0x000B0040, nullptr,                       "SetLcdForceBlack"},
+    {0x000B0040, SetLcdForceBlack,              "SetLcdForceBlack"},
     {0x000C0000, TriggerCmdReqQueue,            "TriggerCmdReqQueue"},
     {0x000D0140, nullptr,                       "SetDisplayTransfer"},
     {0x000E0180, nullptr,                       "SetTextureCopy"},
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index b7102b874..30318fc06 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -15,12 +15,13 @@
 #include "core/hle/service/gsp_gpu.h"
 #include "core/hle/service/dsp_dsp.h"
 
+#include "core/hw/hw.h"
 #include "core/hw/gpu.h"
 
 #include "video_core/command_processor.h"
 #include "video_core/utils.h"
 #include "video_core/video_core.h"
-#include <video_core/color.h>
+#include "video_core/color.h"
 
 namespace GPU {
 
@@ -40,7 +41,7 @@ static bool last_skip_frame = false;
 
 template <typename T>
 inline void Read(T &var, const u32 raw_addr) {
-    u32 addr = raw_addr - 0x1EF00000;
+    u32 addr = raw_addr - HW::VADDR_GPU;
     u32 index = addr / 4;
 
     // Reads other than u32 are untested, so I'd rather have them abort than silently fail
@@ -54,7 +55,7 @@ inline void Read(T &var, const u32 raw_addr) {
 
 template <typename T>
 inline void Write(u32 addr, const T data) {
-    addr -= 0x1EF00000;
+    addr -= HW::VADDR_GPU;
     u32 index = addr / 4;
 
     // Writes other than u32 are untested, so I'd rather have them abort than silently fail
@@ -313,8 +314,6 @@ void Init() {
     framebuffer_top.address_right2 = 0x182B9800;
     framebuffer_sub.address_left1  = 0x1848F000;
     framebuffer_sub.address_left2  = 0x184C7800;
-    //framebuffer_sub.address_right1 = unknown;
-    //framebuffer_sub.address_right2 = unknown;
 
     framebuffer_top.width = 240;
     framebuffer_top.height = 400;
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 3e81f03ef..5b7f0a4e9 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -246,6 +246,8 @@ struct Regs {
         return content[index];
     }
 
+#undef ASSERT_MEMBER_SIZE
+
 private:
     /*
      * Most physical addresses which GPU registers refer to are 8-byte aligned.
diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp
index a63ba6eeb..bed50af50 100644
--- a/src/core/hw/hw.cpp
+++ b/src/core/hw/hw.cpp
@@ -6,43 +6,19 @@
 
 #include "core/hw/hw.h"
 #include "core/hw/gpu.h"
+#include "core/hw/lcd.h"
 
 namespace HW {
 
-enum {
-    VADDR_HASH      = 0x1EC01000,
-    VADDR_CSND      = 0x1EC03000,
-    VADDR_DSP       = 0x1EC40000,
-    VADDR_PDN       = 0x1EC41000,
-    VADDR_CODEC     = 0x1EC41000,
-    VADDR_SPI       = 0x1EC42000,
-    VADDR_SPI_2     = 0x1EC43000,   // Only used under TWL_FIRM?
-    VADDR_I2C       = 0x1EC44000,
-    VADDR_CODEC_2   = 0x1EC45000,
-    VADDR_HID       = 0x1EC46000,
-    VADDR_PAD       = 0x1EC46000,
-    VADDR_PTM       = 0x1EC46000,
-    VADDR_GPIO      = 0x1EC47000,
-    VADDR_I2C_2     = 0x1EC48000,
-    VADDR_SPI_3     = 0x1EC60000,
-    VADDR_I2C_3     = 0x1EC61000,
-    VADDR_MIC       = 0x1EC62000,
-    VADDR_PXI       = 0x1EC63000,   // 0xFFFD2000
-    //VADDR_NTRCARD
-    VADDR_CDMA      = 0xFFFDA000,   // CoreLink DMA-330? Info
-    VADDR_DSP_2     = 0x1ED03000,
-    VADDR_HASH_2    = 0x1EE01000,
-    VADDR_GPU       = 0x1EF00000,
-};
-
 template <typename T>
 inline void Read(T &var, const u32 addr) {
     switch (addr & 0xFFFFF000) {
-
     case VADDR_GPU:
         GPU::Read(var, addr);
         break;
-
+    case VADDR_LCD:
+        LCD::Write(var, addr);
+        break;
     default:
         LOG_ERROR(HW_Memory, "unknown Read%lu @ 0x%08X", sizeof(var) * 8, addr);
     }
@@ -51,11 +27,12 @@ inline void Read(T &var, const u32 addr) {
 template <typename T>
 inline void Write(u32 addr, const T data) {
     switch (addr & 0xFFFFF000) {
-
     case VADDR_GPU:
         GPU::Write(addr, data);
         break;
-
+    case VADDR_LCD:
+        LCD::Write(addr, data);
+        break;
     default:
         LOG_ERROR(HW_Memory, "unknown Write%lu 0x%08X @ 0x%08X", sizeof(data) * 8, (u32)data, addr);
     }
@@ -80,6 +57,7 @@ void Update() {
 /// Initialize hardware
 void Init() {
     GPU::Init();
+    LCD::Init();
     LOG_DEBUG(HW, "initialized OK");
 }
 
diff --git a/src/core/hw/hw.h b/src/core/hw/hw.h
index 991c0a07d..d65608910 100644
--- a/src/core/hw/hw.h
+++ b/src/core/hw/hw.h
@@ -8,6 +8,30 @@
 
 namespace HW {
 
+/// Beginnings of IO register regions, in the user VA space.
+enum : u32 {
+    VADDR_HASH      = 0x1EC01000,
+    VADDR_CSND      = 0x1EC03000,
+    VADDR_DSP       = 0x1EC40000,
+    VADDR_PDN       = 0x1EC41000,
+    VADDR_CODEC     = 0x1EC41000,
+    VADDR_SPI       = 0x1EC42000,
+    VADDR_SPI_2     = 0x1EC43000,   // Only used under TWL_FIRM?
+    VADDR_I2C       = 0x1EC44000,
+    VADDR_CODEC_2   = 0x1EC45000,
+    VADDR_HID       = 0x1EC46000,
+    VADDR_GPIO      = 0x1EC47000,
+    VADDR_I2C_2     = 0x1EC48000,
+    VADDR_SPI_3     = 0x1EC60000,
+    VADDR_I2C_3     = 0x1EC61000,
+    VADDR_MIC       = 0x1EC62000,
+    VADDR_PXI       = 0x1EC63000,
+    VADDR_LCD       = 0x1ED02000,
+    VADDR_DSP_2     = 0x1ED03000,
+    VADDR_HASH_2    = 0x1EE01000,
+    VADDR_GPU       = 0x1EF00000,
+};
+
 template <typename T>
 void Read(T &var, const u32 addr);
 
diff --git a/src/core/hw/lcd.cpp b/src/core/hw/lcd.cpp
new file mode 100644
index 000000000..7986f3ddb
--- /dev/null
+++ b/src/core/hw/lcd.cpp
@@ -0,0 +1,66 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+
+#include "core/arm/arm_interface.h"
+#include "core/hle/hle.h"
+#include "core/hw/hw.h"
+#include "core/hw/lcd.h"
+
+namespace LCD {
+
+Regs g_regs;
+
+template <typename T>
+inline void Read(T &var, const u32 raw_addr) {
+    u32 addr = raw_addr - HW::VADDR_LCD;
+    u32 index = addr / 4;
+
+    // Reads other than u32 are untested, so I'd rather have them abort than silently fail
+    if (index >= 0x400 || !std::is_same<T, u32>::value) {
+        LOG_ERROR(HW_LCD, "unknown Read%lu @ 0x%08X", sizeof(var) * 8, addr);
+        return;
+    }
+
+    var = g_regs[index];
+}
+
+template <typename T>
+inline void Write(u32 addr, const T data) {
+    addr -= HW::VADDR_LCD;
+    u32 index = addr / 4;
+
+    // Writes other than u32 are untested, so I'd rather have them abort than silently fail
+    if (index >= 0x400 || !std::is_same<T, u32>::value) {
+        LOG_ERROR(HW_LCD, "unknown Write%lu 0x%08X @ 0x%08X", sizeof(data) * 8, (u32)data, addr);
+        return;
+    }
+
+    g_regs[index] = static_cast<u32>(data);
+}
+
+// Explicitly instantiate template functions because we aren't defining this in the header:
+
+template void Read<u64>(u64 &var, const u32 addr);
+template void Read<u32>(u32 &var, const u32 addr);
+template void Read<u16>(u16 &var, const u32 addr);
+template void Read<u8>(u8 &var, const u32 addr);
+
+template void Write<u64>(u32 addr, const u64 data);
+template void Write<u32>(u32 addr, const u32 data);
+template void Write<u16>(u32 addr, const u16 data);
+template void Write<u8>(u32 addr, const u8 data);
+
+/// Initialize hardware
+void Init() {
+    LOG_DEBUG(HW_LCD, "initialized OK");
+}
+
+/// Shutdown hardware
+void Shutdown() {
+    LOG_DEBUG(HW_LCD, "shutdown OK");
+}
+    
+} // namespace
diff --git a/src/core/hw/lcd.h b/src/core/hw/lcd.h
new file mode 100644
index 000000000..43893a625
--- /dev/null
+++ b/src/core/hw/lcd.h
@@ -0,0 +1,88 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+
+#include "common/common_types.h"
+#include "common/bit_field.h"
+
+#define LCD_REG_INDEX(field_name) (offsetof(LCD::Regs, field_name) / sizeof(u32))
+
+namespace LCD {
+
+struct Regs {
+
+    union ColorFill {
+        u32 raw;
+
+        BitField<0, 8, u32> color_r;
+        BitField<8, 8, u32> color_g;
+        BitField<16, 8, u32> color_b;
+        BitField<24, 1, u32> is_enabled;
+    };
+
+    INSERT_PADDING_WORDS(0x81);
+    ColorFill color_fill_top;
+    INSERT_PADDING_WORDS(0xE);
+    u32 backlight_top;
+
+    INSERT_PADDING_WORDS(0x1F0);
+
+    ColorFill color_fill_bottom;
+    INSERT_PADDING_WORDS(0xE);
+    u32 backlight_bottom;
+    INSERT_PADDING_WORDS(0x16F);
+
+    static inline size_t NumIds() {
+        return sizeof(Regs) / sizeof(u32);
+    }
+
+    u32& operator [] (int index) const {
+        u32* content = (u32*)this;
+        return content[index];
+    }
+
+    u32& operator [] (int index) {
+        u32* content = (u32*)this;
+        return content[index];
+    }
+
+#undef ASSERT_MEMBER_SIZE
+
+};
+static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout");
+
+// TODO: MSVC does not support using offsetof() on non-static data members even though this
+//       is technically allowed since C++11. This macro should be enabled once MSVC adds
+//       support for that.
+#ifndef _MSC_VER
+#define ASSERT_REG_POSITION(field_name, position) \
+    static_assert(offsetof(Regs, field_name) == position * 4, \
+              "Field "#field_name" has invalid position")
+
+ASSERT_REG_POSITION(color_fill_top,    0x81);
+ASSERT_REG_POSITION(backlight_top,     0x90);
+ASSERT_REG_POSITION(color_fill_bottom, 0x281);
+ASSERT_REG_POSITION(backlight_bottom,  0x290);
+
+#undef ASSERT_REG_POSITION
+#endif // !defined(_MSC_VER)
+
+extern Regs g_regs;
+
+template <typename T>
+void Read(T &var, const u32 addr);
+
+template <typename T>
+void Write(u32 addr, const T data);
+
+/// Initialize hardware
+void Init();
+
+/// Shutdown hardware
+void Shutdown();
+    
+} // namespace
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 95ab96340..4273a177f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -3,6 +3,8 @@
 // Refer to the license.txt file included.
 
 #include "core/hw/gpu.h"
+#include "core/hw/hw.h"
+#include "core/hw/lcd.h"
 #include "core/mem_map.h"
 
 #include "common/emu_window.h"
@@ -64,16 +66,33 @@ void RendererOpenGL::SwapBuffers() {
     for(int i : {0, 1}) {
         const auto& framebuffer = GPU::g_regs.framebuffer_config[i];
 
-        if (textures[i].width != (GLsizei)framebuffer.width ||
-            textures[i].height != (GLsizei)framebuffer.height ||
-            textures[i].format != framebuffer.color_format) {
-            // Reallocate texture if the framebuffer size has changed.
-            // This is expected to not happen very often and hence should not be a
-            // performance problem.
-            ConfigureFramebufferTexture(textures[i], framebuffer);
-        }
+        // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04
+        u32 lcd_color_addr = (i == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom);
+        lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr;
+        LCD::Regs::ColorFill color_fill = {0};
+        LCD::Read(color_fill.raw, lcd_color_addr);
 
-        LoadFBToActiveGLTexture(GPU::g_regs.framebuffer_config[i], textures[i]);
+        if (color_fill.is_enabled) {
+            LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, textures[i]);
+
+            // Resize the texture in case the framebuffer size has changed
+            textures[i].width = 1;
+            textures[i].height = 1;
+        } else {
+            if (textures[i].width != (GLsizei)framebuffer.width ||
+                textures[i].height != (GLsizei)framebuffer.height ||
+                textures[i].format != framebuffer.color_format) {
+                // Reallocate texture if the framebuffer size has changed.
+                // This is expected to not happen very often and hence should not be a
+                // performance problem.
+                ConfigureFramebufferTexture(textures[i], framebuffer);
+            }
+            LoadFBToActiveGLTexture(framebuffer, textures[i]);
+
+            // Resize the texture in case the framebuffer size has changed
+            textures[i].width = framebuffer.width;
+            textures[i].height = framebuffer.height;
+        }
     }
 
     DrawScreens();
@@ -127,10 +146,25 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
     // TODO: Applications could theoretically crash Citra here by specifying too large
     //       framebuffer sizes. We should make sure that this cannot happen.
     glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
-        texture.gl_format, texture.gl_type, framebuffer_data);
+                    texture.gl_format, texture.gl_type, framebuffer_data);
 
     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+    glBindTexture(GL_TEXTURE_2D, 0);
+}
 
+/**
+ * Fills active OpenGL texture with the given RGB color.
+ * Since the color is solid, the texture can be 1x1 but will stretch across whatever it's rendered on.
+ * This has the added benefit of being *really fast*.
+ */
+void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
+                                                const TextureInfo& texture) {
+    glBindTexture(GL_TEXTURE_2D, texture.handle);
+
+    u8 framebuffer_data[3] = { color_r, color_g, color_b };
+
+    // Update existing texture
+    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data);
     glBindTexture(GL_TEXTURE_2D, 0);
 }
 
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index bcabab557..cd782428e 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -58,6 +58,9 @@ private:
     // Loads framebuffer from emulated memory into the active OpenGL texture.
     static void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer,
                                         const TextureInfo& texture);
+    // Fills active OpenGL texture with the given RGB color.
+    static void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
+                                           const TextureInfo& texture);
 
     /// Computes the viewport rectangle
     MathUtil::Rectangle<unsigned> GetViewportExtent();
@@ -72,7 +75,7 @@ private:
     GLuint vertex_array_handle;
     GLuint vertex_buffer_handle;
     GLuint program_id;
-    std::array<TextureInfo, 2> textures;
+    std::array<TextureInfo, 2> textures;          ///< Textures for top and bottom screens respectively
     // Shader uniform location indices
     GLuint uniform_modelview_matrix;
     GLuint uniform_color_texture;