interpreter cache

This commit is contained in:
Bonta-kun 2016-04-26 00:41:21 +02:00
parent 0964a3ff53
commit 16df8e5034
6 changed files with 321 additions and 24 deletions

View file

@ -1,4 +1,5 @@
set(SRCS set(SRCS
arm/cache/cache.cpp
arm/disassembler/arm_disasm.cpp arm/disassembler/arm_disasm.cpp
arm/disassembler/load_symbol_map.cpp arm/disassembler/load_symbol_map.cpp
arm/dyncom/arm_dyncom.cpp arm/dyncom/arm_dyncom.cpp
@ -127,6 +128,7 @@ set(SRCS
set(HEADERS set(HEADERS
arm/arm_interface.h arm/arm_interface.h
arm/cache/cache.h
arm/disassembler/arm_disasm.h arm/disassembler/arm_disasm.h
arm/disassembler/load_symbol_map.h arm/disassembler/load_symbol_map.h
arm/dyncom/arm_dyncom.h arm/dyncom/arm_dyncom.h

155
src/core/arm/cache/cache.cpp vendored Normal file
View file

@ -0,0 +1,155 @@
// Copyright 2016 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "core/arm/cache/cache.h"
namespace Cache {
CacheBase::CacheBase(bool index_mode, OnClearCb clearcb) : index_mode(index_mode) {
page_pointers.fill(nullptr);
Clear();
SetClearCallback(clearcb);
g_cachemanager.RegisterCache(this);
}
CacheBase::~CacheBase() {
g_cachemanager.UnregisterCache(this);
}
void CacheBase::Clear() {
if (OnClearCallback != nullptr) OnClearCallback();
for (auto& cache : ptr_caches) cache.data.assign(cache.data.size(), nullptr);
if (index_mode) {
blocks_pc.assign(MAX_BLOCKS, INVALID_BLOCK);
next_block = num_blocks = 0;
}
}
bool CacheBase::RemoveBlock(u32 pc) {
u8** ptr = page_pointers[pc >> Memory::PAGE_BITS];
if (ptr != nullptr) {
ptr = &ptr[pc & Memory::PAGE_MASK];
if (*ptr == nullptr) return false;
if (index_mode) {
const u32 id = pointer_to_id(*ptr);
ASSERT(blocks_pc[id] == pc);
blocks_pc[id] = INVALID_BLOCK;
if (id < next_block) next_block = id;
while (num_blocks > 0 && blocks_pc[num_blocks - 1] == INVALID_BLOCK) --num_blocks;
}
*ptr = nullptr;
return true;
}
return false;
}
bool CacheBase::RemoveRange(u32 start, u32 end) {
bool result = false;
for (auto& cache : ptr_caches) {
for (int i = std::max(start, cache.addr); i < std::min(end, cache.addr_end); ++i) {
u8** ptr = &cache.data[i - cache.addr];
if (*ptr == nullptr) continue;
if (index_mode) {
const u32 id = pointer_to_id(*ptr);
ASSERT(blocks_pc[id] == i);
blocks_pc[id] = INVALID_BLOCK;
if (id < next_block) next_block = id;
while (num_blocks > 0 && blocks_pc[num_blocks - 1] == INVALID_BLOCK) --num_blocks;
}
*ptr = nullptr;
result = true;
}
}
return result;
}
void CacheBase::OnCodeLoad(u32 address, u32 size) {
const u32 end = address + size;
// Check there is no overlapping
for (auto const& cache : ptr_caches) ASSERT((address >= cache.addr_end) || (end <= cache.addr));
ASSERT((address & Memory::PAGE_MASK) == 0 && (size & Memory::PAGE_MASK) == 0);
BlockPtrCache cache{ address, address + size };
cache.data.assign(size, nullptr);
for (u32 i = address; i < end; i += Memory::PAGE_SIZE) { page_pointers[i >> Memory::PAGE_BITS] = &cache.data[i - address]; }
ptr_caches.emplace_back(std::move(cache));
}
void CacheBase::OnCodeUnload(u32 address, u32 size) {
const u32 end = address + size;
ptr_caches.erase(std::remove_if(ptr_caches.begin(), ptr_caches.end(),
[&](auto const& cache) {
if ((address < cache.addr_end) && (end > cache.addr)) {
RemoveRange(cache.addr, cache.addr_end);
for (u32 i = cache.addr; i < cache.addr_end; i += Memory::PAGE_SIZE) { page_pointers[i >> Memory::PAGE_BITS] = nullptr; }
return true;
}
return false;
}),
ptr_caches.cend());
}
u8*& CacheBase::GetNewPtr(u32 pc) {
DEBUG_ASSERT(!index_mode || next_block == MAX_BLOCKS || ((next_block < MAX_BLOCKS) && blocks_pc[next_block] == INVALID_BLOCK));
DEBUG_ASSERT(GetPtr(pc) == nullptr);
u8** page_ptr = page_pointers[pc >> Memory::PAGE_BITS];
if (page_ptr == nullptr) {
// pc isnt within mapped code
OnCodeLoad(pc & ~Memory::PAGE_MASK, Memory::PAGE_SIZE);
page_ptr = page_pointers[pc >> Memory::PAGE_BITS];
}
u8** block_ptr = &page_ptr[pc & Memory::PAGE_MASK];
DEBUG_ASSERT(*block_ptr == nullptr);
if (index_mode) {
if (next_block == MAX_BLOCKS) Clear();
blocks_pc[next_block] = pc;
*block_ptr = id_to_pointer(next_block);
do ++next_block; while (next_block <= num_blocks && blocks_pc[next_block] != INVALID_BLOCK);
if (next_block > num_blocks) num_blocks++;
}
return *block_ptr;
}
void CacheManager::RegisterCode(u32 address, u32 size) const {
for (auto const& cache : caches) cache->OnCodeLoad(address, size);
}
void CacheManager::UnregisterCode(u32 address, u32 size) const {
for (auto const& cache : caches) cache->OnCodeUnload(address, size);
}
void CacheManager::ClearCache() const {
for (auto const& cache : caches) cache->Clear();
}
void CacheManager::RegisterCache(CacheBase* cache) {
caches.push_back(cache);
}
void CacheManager::UnregisterCache(CacheBase* cache) {
caches.erase(std::remove(caches.begin(), caches.end(), cache), caches.end());
}
CacheManager g_cachemanager;
}

137
src/core/arm/cache/cache.h vendored Normal file
View file

@ -0,0 +1,137 @@
// Copyright 2016 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <array>
#include <functional>
#include <list>
#include <vector>
#include "common/assert.h"
#include "common/common_types.h"
#include "core/memory.h"
namespace Cache {
using OnClearCb = std::function<void()>;
const u32 MAX_BLOCKS = 0x40000;
const u32 INVALID_BLOCK = 0xFFFFFFFF;
struct BlockPtrCache {
u32 addr;
u32 addr_end;
std::vector<u8*> data;
};
class CacheBase {
protected:
explicit CacheBase(bool index_mode, OnClearCb clearcb);
~CacheBase();
public:
/// Called when the cache needs to reset or Clear() is called
void SetClearCallback(OnClearCb cb) { OnClearCallback = cb; }
/// Clear and call clear callback
void Clear();
// returns true if block was found, false otherwise
bool RemoveBlock(u32 pc);
bool RemoveRange(u32 start, u32 end);
void OnCodeLoad(u32 address, u32 size);
void OnCodeUnload(u32 address, u32 size);
protected:
u8* GetPtr(u32 pc) const {
u8** ptr = page_pointers[pc >> Memory::PAGE_BITS];
if (ptr != nullptr) {
DEBUG_ASSERT(!index_mode || blocks_pc[pointer_to_id(ptr[pc & Memory::PAGE_MASK])] == pc);
return ptr[pc & Memory::PAGE_MASK];
}
return nullptr;
}
u8*& GetNewPtr(u32 pc);
std::function<u8*(u32)> id_to_pointer;
std::function<u32(u8*)> pointer_to_id;
private:
bool index_mode;
OnClearCb OnClearCallback = nullptr;
std::vector<BlockPtrCache> ptr_caches;
std::array<u8**, (1 << (32 - Memory::PAGE_BITS))> page_pointers;
std::vector<u32> blocks_pc;
u32 next_block = 0;
u32 num_blocks = 0;
};
/// Use this if you only need to store a pointer
template <typename T>
class PtrCache final : public CacheBase {
public:
explicit PtrCache(OnClearCb clearcb = nullptr) : CacheBase(false, clearcb) {
static_assert(std::is_pointer<T>::value, "T must be a pointer");
}
~PtrCache() {}
/// Get cached pointer for PC
T FindPtr(u32 pc) { return reinterpret_cast<T>(GetPtr(pc)); }
/// Get reference of pointer for PC
T& GetNewPtr(u32 pc) { return reinterpret_cast<T&>(CacheBase::GetNewPtr(pc)); }
};
/// Index based cache
template <typename T>
class Cache final : public CacheBase {
public:
explicit Cache(OnClearCb clearcb = nullptr) : CacheBase(true, clearcb) {
id_to_pointer = [this](u32 id) -> u8* {
return reinterpret_cast<u8*>(&blocks[id]);
};
pointer_to_id = [this](u8* ptr) -> u32 {
return static_cast<u32>(reinterpret_cast<T*>(ptr) - &blocks[0]);
};
}
~Cache() {}
/// Get block cached for PC
T* FindBlock(u32 pc) { return reinterpret_cast<T*>(GetPtr(pc)); }
/// Allocate block for PC
T& GetNewBlock(u32 pc) { return *reinterpret_cast<T*&>(GetNewPtr(pc)); }
private:
std::array<T, MAX_BLOCKS> blocks;
};
class CacheManager {
public:
CacheManager() {}
~CacheManager() {}
/// Loaders call these when mapping/unmapping code
void RegisterCode(u32 address, u32 size) const;
void UnregisterCode(u32 address, u32 size = 1) const;
/// Clear every cache
void ClearCache() const;
private:
std::list<CacheBase*> caches;
public:
void RegisterCache(CacheBase* cache);
void UnregisterCache(CacheBase* cache);
};
extern CacheManager g_cachemanager;
}

View file

@ -14,6 +14,7 @@
#include "core/memory.h" #include "core/memory.h"
#include "core/hle/svc.h" #include "core/hle/svc.h"
#include "core/arm/cache/cache.h"
#include "core/arm/disassembler/arm_disasm.h" #include "core/arm/disassembler/arm_disasm.h"
#include "core/arm/dyncom/arm_dyncom_dec.h" #include "core/arm/dyncom/arm_dyncom_dec.h"
#include "core/arm/dyncom/arm_dyncom_interpreter.h" #include "core/arm/dyncom/arm_dyncom_interpreter.h"
@ -1144,6 +1145,8 @@ static inline void *AllocBuffer(unsigned int size) {
return (void *)&inst_buf[start]; return (void *)&inst_buf[start];
} }
Cache::PtrCache<u8*> instr_cache([]() {top = 0;});
static shtop_fp_t get_shtop(unsigned int inst) { static shtop_fp_t get_shtop(unsigned int inst) {
if (BIT(inst, 25)) { if (BIT(inst, 25)) {
return DPO(Immediate); return DPO(Immediate);
@ -3495,7 +3498,7 @@ static unsigned int InterpreterTranslateInstruction(const ARMul_State* cpu, cons
return inst_size; return inst_size;
} }
static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) { static int InterpreterTranslateBlock(ARMul_State* cpu, u32 addr) {
Common::Profiling::ScopeTimer timer_decode(profile_decode); Common::Profiling::ScopeTimer timer_decode(profile_decode);
MICROPROFILE_SCOPE(DynCom_Decode); MICROPROFILE_SCOPE(DynCom_Decode);
@ -3506,7 +3509,6 @@ static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr)
ARM_INST_PTR inst_base = nullptr; ARM_INST_PTR inst_base = nullptr;
int ret = NON_BRANCH; int ret = NON_BRANCH;
int size = 0; // instruction size of basic block int size = 0; // instruction size of basic block
bb_start = top;
u32 phys_addr = addr; u32 phys_addr = addr;
u32 pc_start = cpu->Reg[15]; u32 pc_start = cpu->Reg[15];
@ -3524,17 +3526,14 @@ static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr)
ret = inst_base->br; ret = inst_base->br;
}; };
cpu->instruction_cache[pc_start] = bb_start;
return KEEP_GOING; return KEEP_GOING;
} }
static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) { static int InterpreterTranslateSingle(ARMul_State* cpu, u32 addr) {
Common::Profiling::ScopeTimer timer_decode(profile_decode); Common::Profiling::ScopeTimer timer_decode(profile_decode);
MICROPROFILE_SCOPE(DynCom_Decode); MICROPROFILE_SCOPE(DynCom_Decode);
ARM_INST_PTR inst_base = nullptr; ARM_INST_PTR inst_base = nullptr;
bb_start = top;
u32 phys_addr = addr; u32 phys_addr = addr;
u32 pc_start = cpu->Reg[15]; u32 pc_start = cpu->Reg[15];
@ -3545,8 +3544,6 @@ static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr)
inst_base->br = SINGLE_STEP; inst_base->br = SINGLE_STEP;
} }
cpu->instruction_cache[pc_start] = bb_start;
return KEEP_GOING; return KEEP_GOING;
} }
@ -3589,7 +3586,7 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
#define SHIFTER_OPERAND inst_cream->shtop_func(cpu, inst_cream->shifter_operand) #define SHIFTER_OPERAND inst_cream->shtop_func(cpu, inst_cream->shifter_operand)
#define FETCH_INST if (inst_base->br != NON_BRANCH) goto DISPATCH; \ #define FETCH_INST if (inst_base->br != NON_BRANCH) goto DISPATCH; \
inst_base = (arm_inst *)&inst_buf[ptr] inst_base = reinterpret_cast<arm_inst*>(ptr)
#define INC_PC(l) ptr += sizeof(arm_inst) + l #define INC_PC(l) ptr += sizeof(arm_inst) + l
#define INC_PC_STUB ptr += sizeof(arm_inst) #define INC_PC_STUB ptr += sizeof(arm_inst)
@ -3879,7 +3876,7 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
unsigned int addr; unsigned int addr;
unsigned int num_instrs = 0; unsigned int num_instrs = 0;
int ptr; u8* ptr;
LOAD_NZCVT; LOAD_NZCVT;
DISPATCH: DISPATCH:
@ -3895,16 +3892,21 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
else else
cpu->Reg[15] &= 0xfffffffc; cpu->Reg[15] &= 0xfffffffc;
//clear cache if we dont have more than 10kb of buffer remaining
if ((top + (10 * 1024)) >= CACHE_BUFFER_SIZE) instr_cache.Clear();
// Find the cached instruction cream, otherwise translate it... // Find the cached instruction cream, otherwise translate it...
auto itr = cpu->instruction_cache.find(cpu->Reg[15]); ptr = instr_cache.FindPtr(cpu->Reg[15]);
if (itr != cpu->instruction_cache.end()) { if (ptr == nullptr) {
ptr = itr->second; ptr = instr_cache.GetNewPtr(cpu->Reg[15]) = reinterpret_cast<u8*>(&inst_buf[top]);
} else if (cpu->NumInstrsToExecute != 1) { if (cpu->NumInstrsToExecute != 1) {
if (InterpreterTranslateBlock(cpu, ptr, cpu->Reg[15]) == FETCH_EXCEPTION) if (InterpreterTranslateBlock(cpu, cpu->Reg[15]) == FETCH_EXCEPTION)
goto END; goto END;
} else { }
if (InterpreterTranslateSingle(cpu, ptr, cpu->Reg[15]) == FETCH_EXCEPTION) else {
goto END; if (InterpreterTranslateSingle(cpu, cpu->Reg[15]) == FETCH_EXCEPTION)
goto END;
}
} }
// Find breakpoint if one exists within the block // Find breakpoint if one exists within the block
@ -3912,7 +3914,7 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
breakpoint_data = GDBStub::GetNextBreakpointFromAddress(cpu->Reg[15], GDBStub::BreakpointType::Execute); breakpoint_data = GDBStub::GetNextBreakpointFromAddress(cpu->Reg[15], GDBStub::BreakpointType::Execute);
} }
inst_base = (arm_inst *)&inst_buf[ptr]; inst_base = reinterpret_cast<arm_inst*>(ptr);
GOTO_NEXT_INST; GOTO_NEXT_INST;
} }
ADC_INST: ADC_INST:

View file

@ -236,10 +236,6 @@ public:
unsigned bigendSig; unsigned bigendSig;
unsigned syscallSig; unsigned syscallSig;
// TODO(bunnei): Move this cache to a better place - it should be per codeset (likely per
// process for our purposes), not per ARMul_State (which tracks CPU core state).
std::unordered_map<u32, int> instruction_cache;
private: private:
void ResetMPCoreCP15Registers(); void ResetMPCoreCP15Registers();

View file

@ -8,6 +8,7 @@
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/arm/cache/cache.h"
#include "core/hle/kernel/memory.h" #include "core/hle/kernel/memory.h"
#include "core/hle/kernel/process.h" #include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/resource_limit.h"
@ -120,6 +121,10 @@ void Process::Run(s32 main_thread_priority, u32 stack_size) {
MapSegment(codeset->rodata, VMAPermission::Read, MemoryState::Code); MapSegment(codeset->rodata, VMAPermission::Read, MemoryState::Code);
MapSegment(codeset->data, VMAPermission::ReadWrite, MemoryState::Private); MapSegment(codeset->data, VMAPermission::ReadWrite, MemoryState::Private);
// Map cache
Cache::g_cachemanager.UnregisterCode(0, 0xFFFFFFFF);
Cache::g_cachemanager.RegisterCode(codeset->code.addr, codeset->code.size);
// Allocate and map stack // Allocate and map stack
vm_manager.MapMemoryBlock(Memory::HEAP_VADDR_END - stack_size, vm_manager.MapMemoryBlock(Memory::HEAP_VADDR_END - stack_size,
std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size, MemoryState::Locked std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size, MemoryState::Locked