diff --git a/CMakeLists.txt b/CMakeLists.txt index b8a981711..5959543bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,14 @@ option(CITRA_USE_BUNDLED_GLFW "Download bundled GLFW binaries" OFF) option(ENABLE_QT "Enable the Qt frontend" ON) option(CITRA_USE_BUNDLED_QT "Download bundled Qt binaries" OFF) option(CITRA_FORCE_QT4 "Use Qt4 even if Qt5 is available." OFF) +option(ENABLE_BINARY_TRANSLATION "Enable binary translation. Requires LLVM" OFF) +if(ENABLE_BINARY_TRANSLATION) + add_definitions(-DENABLE_BINARY_TRANSLATION) + find_package(LLVM REQUIRED CONFIG) + include_directories(${LLVM_INCLUDE_DIRS}) + add_definitions(${LLVM_DEFINITIONS}) + llvm_map_components_to_libnames(llvm_libs Core Support native ExecutionEngine MCJIT BitWriter ipo) +endif() if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.git/hooks/pre-commit) message(STATUS "Copying pre-commit hook") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cb09f3cd1..06615a7a6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,3 +10,6 @@ endif() if (ENABLE_QT) add_subdirectory(citra_qt) endif() +if(ENABLE_BINARY_TRANSLATION) + add_subdirectory(binary_translation) +endif() diff --git a/src/binary_translation/ARMFuncs.cpp b/src/binary_translation/ARMFuncs.cpp new file mode 100644 index 000000000..12f65b92d --- /dev/null +++ b/src/binary_translation/ARMFuncs.cpp @@ -0,0 +1,160 @@ +#include "ARMFuncs.h" +#include "InstructionBlock.h" +#include + +ARMFuncs::ShiftTN ARMFuncs::DecodeImmShift(InstructionBlock* instruction, u32 type, u32 imm5) +{ + auto ir_builder = instruction->IrBuilder(); + switch (type) + { + case 0: return{ SRType::LSL, ir_builder->getInt32(imm5) }; + case 1: return{ SRType::LSR, ir_builder->getInt32(imm5 ? imm5 : 32) }; + case 2: return{ SRType::ASR, ir_builder->getInt32(imm5 ? imm5 : 32) }; + case 3: + if (imm5) + return{ SRType::ROR, ir_builder->getInt32(imm5) }; + else + return{ SRType::RRX, ir_builder->getInt32(1) }; + default: assert(false, "Invalid shift type"); + } +} + +llvm::Value* ARMFuncs::Shift(InstructionBlock* instruction, llvm::Value* value, SRType type, llvm::Value* amount, llvm::Value* carry_in) +{ + return Shift_C(instruction, value, type, amount, carry_in).result; +} + +ARMFuncs::ResultCarry ARMFuncs::Shift_C(InstructionBlock* instruction, llvm::Value* value, SRType type, llvm::Value* amount, llvm::Value* carry_in) +{ + auto ir_builder = instruction->IrBuilder(); + + auto amount_zero = ir_builder->CreateICmpEQ(amount, ir_builder->getInt32(0)); + ResultCarry result_amount_not_zero = {}; + switch (type) + { + case SRType::LSL: result_amount_not_zero = LSL_C(instruction, value, amount); break; + case SRType::LSR: result_amount_not_zero = LSR_C(instruction, value, amount); break; + case SRType::ASR: result_amount_not_zero = ASR_C(instruction, value, amount); break; + case SRType::ROR: result_amount_not_zero = ROR_C(instruction, value, amount); break; + case SRType::RRX: result_amount_not_zero = RRX_C(instruction, value, carry_in); break; + default: assert(false, "Invalid shift type"); + } + + auto result = ir_builder->CreateSelect(amount_zero, value, result_amount_not_zero.result); + auto carry = ir_builder->CreateSelect(amount_zero, carry_in, result_amount_not_zero.carry); + + return{ result, carry }; +} + +// Generates code for LSL, LSR that checks for 0 shift +llvm::Value* ShiftZeroCheck( + InstructionBlock *instruction, llvm::Value* x, llvm::Value* shift, + std::function non_zero_function) +{ + auto ir_builder = instruction->IrBuilder(); + + auto amount_zero = ir_builder->CreateICmpEQ(shift, ir_builder->getInt32(0)); + auto result_amount_not_zero = non_zero_function(instruction, x, shift); + + return ir_builder->CreateSelect(amount_zero, x, result_amount_not_zero.result); +} + +ARMFuncs::ResultCarry ARMFuncs::LSL_C(InstructionBlock* instruction, llvm::Value* x, llvm::Value* shift) +{ + auto ir_builder = instruction->IrBuilder(); + auto N = ir_builder->getInt32(32); + + auto result = ir_builder->CreateShl(x, shift); + auto carry = ir_builder->CreateTrunc(ir_builder->CreateLShr(x, ir_builder->CreateSub(N, shift, "", true, true)), ir_builder->getInt1Ty()); + return{ result, carry }; +} + +llvm::Value* ARMFuncs::LSL(InstructionBlock* instruction, llvm::Value* x, llvm::Value* shift) +{ + return ShiftZeroCheck(instruction, x, shift, &ARMFuncs::LSL_C); +} + +ARMFuncs::ResultCarry ARMFuncs::LSR_C(InstructionBlock* instruction, llvm::Value* x, llvm::Value* shift) +{ + auto ir_builder = instruction->IrBuilder(); + auto one = ir_builder->getInt32(1); + + auto result = ir_builder->CreateLShr(x, shift); + auto carry = ir_builder->CreateTrunc(ir_builder->CreateLShr(x, ir_builder->CreateSub(shift, one, "", true, true)), ir_builder->getInt1Ty()); + return{ result, carry }; +} + +llvm::Value* ARMFuncs::LSR(InstructionBlock* instruction, llvm::Value* x, llvm::Value* shift) +{ + return ShiftZeroCheck(instruction, x, shift, &ARMFuncs::LSR_C); +} + +ARMFuncs::ResultCarry ARMFuncs::ASR_C(InstructionBlock* instruction, llvm::Value* x, llvm::Value* shift) +{ + auto ir_builder = instruction->IrBuilder(); + auto one = ir_builder->getInt32(1); + + auto result = ir_builder->CreateAShr(x, shift); + auto carry = ir_builder->CreateTrunc(ir_builder->CreateLShr(x, ir_builder->CreateSub(shift, one, "", true, true)), ir_builder->getInt1Ty()); + return{ result, carry }; +} + +ARMFuncs::ResultCarry ARMFuncs::ROR_C(InstructionBlock* instruction, llvm::Value* x, llvm::Value* shift) +{ + auto ir_builder = instruction->IrBuilder(); + auto N = ir_builder->getInt32(32); + auto m = ir_builder->CreateURem(shift, N); + + auto result = ir_builder->CreateOr(LSR(instruction, x, m), LSL(instruction, x, ir_builder->CreateSub(N, m))); + auto carry = ir_builder->CreateTrunc(ir_builder->CreateLShr(result, ir_builder->getInt32(31)), ir_builder->getInt1Ty()); + return{ result, carry }; +} + +ARMFuncs::ResultCarry ARMFuncs::RRX_C(InstructionBlock* instruction, llvm::Value* x, llvm::Value* carry_in) +{ + auto ir_builder = instruction->IrBuilder(); + + auto result = ir_builder->CreateLShr(x, 1); + result = ir_builder->CreateOr(result, ir_builder->CreateShl(ir_builder->CreateZExt(carry_in, ir_builder->getInt32Ty()), 31)); + auto carry = ir_builder->CreateTrunc(x, ir_builder->getInt1Ty()); + return{ result, carry }; +} + +llvm::Value* ARMFuncs::ARMExpandImm(InstructionBlock* instruction, u32 imm12) +{ + auto ir_builder = instruction->IrBuilder(); + // Manual says carry in does not affect the result, so use undef + return ARMExpandImm_C(instruction, imm12, llvm::UndefValue::get(ir_builder->getInt1Ty())).result; +} + +ARMFuncs::ResultCarry ARMFuncs::ARMExpandImm_C(InstructionBlock *instruction, u32 imm12, llvm::Value* carry) +{ + auto ir_builder = instruction->IrBuilder(); + + auto value = ir_builder->getInt32(imm12 & 0xFF); + auto shift = ir_builder->getInt32(2 * (imm12 >> 8)); + return Shift_C(instruction, value, SRType::ROR, shift, carry); +} + +// AddWithCarry from armsupp.cpp +ARMFuncs::ResultCarryOverflow ARMFuncs::AddWithCarry(InstructionBlock* instruction, llvm::Value* x, llvm::Value* y, llvm::Value* carry_in) +{ + auto ir_builder = instruction->IrBuilder(); + + auto xu64 = ir_builder->CreateZExt(x, ir_builder->getInt64Ty()); + auto xs64 = ir_builder->CreateSExt(x, ir_builder->getInt64Ty()); + auto yu64 = ir_builder->CreateZExt(y, ir_builder->getInt64Ty()); + auto ys64 = ir_builder->CreateSExt(y, ir_builder->getInt64Ty()); + auto c64 = ir_builder->CreateZExt(carry_in, ir_builder->getInt64Ty()); + + auto unsignedSum = ir_builder->CreateAdd(ir_builder->CreateAdd(xu64, yu64), c64); + auto singedSum = ir_builder->CreateAdd(ir_builder->CreateAdd(xs64, ys64), c64); + auto result32 = ir_builder->CreateTrunc(unsignedSum, ir_builder->getInt32Ty()); + auto resultU64 = ir_builder->CreateZExt(result32, ir_builder->getInt64Ty()); + auto resultS64 = ir_builder->CreateSExt(result32, ir_builder->getInt64Ty()); + + auto carry = ir_builder->CreateICmpNE(resultU64, unsignedSum); + auto overflow = ir_builder->CreateICmpNE(resultS64, singedSum); + + return{ result32, carry, overflow }; +} \ No newline at end of file diff --git a/src/binary_translation/ARMFuncs.h b/src/binary_translation/ARMFuncs.h new file mode 100644 index 000000000..0097663fa --- /dev/null +++ b/src/binary_translation/ARMFuncs.h @@ -0,0 +1,51 @@ +#include + +/* + * Functions from the manual, + * A8.4.3 Pseudocode details of instruction-specified shifts and rotates + * A2.2.1 Integer arithmetic + * A5.2.4 Modified immediate constants in ARM instructions + */ + +class InstructionBlock; + +namespace llvm +{ + class Value; +} + +class ARMFuncs +{ +public: + enum class SRType { LSL, LSR, ASR, RRX, ROR }; + struct ShiftTN + { + SRType type; + llvm::Value *amount; + }; + struct ResultCarry + { + llvm::Value *result, *carry; + }; + struct ResultCarryOverflow + { + llvm::Value *result, *carry, *overflow; + }; + + static ShiftTN DecodeImmShift(InstructionBlock *instruction, u32 type, u32 imm5); + + static llvm::Value *Shift(InstructionBlock *instruction, llvm::Value *value, SRType type, llvm::Value *amount, llvm::Value *carry_in); + static ResultCarry Shift_C(InstructionBlock *instruction, llvm::Value *value, SRType type, llvm::Value *amount, llvm::Value *carry_in); + static ResultCarry LSL_C(InstructionBlock *instruction, llvm::Value *x, llvm::Value *shift); + static llvm::Value *LSL(InstructionBlock *instruction, llvm::Value *x, llvm::Value *shift); + static ResultCarry LSR_C(InstructionBlock *instruction, llvm::Value *x, llvm::Value *shift); + static llvm::Value *LSR(InstructionBlock *instruction, llvm::Value *x, llvm::Value *shift); + static ResultCarry ASR_C(InstructionBlock *instruction, llvm::Value *x, llvm::Value *shift); + static ResultCarry ROR_C(InstructionBlock *instruction, llvm::Value *x, llvm::Value *shift); + static ResultCarry RRX_C(InstructionBlock *instruction, llvm::Value *x, llvm::Value *carry_in); + + static llvm::Value *ARMExpandImm(InstructionBlock *instruction, u32 imm12); + static ResultCarry ARMExpandImm_C(InstructionBlock *instruction, u32 imm12, llvm::Value *carry); + + static ResultCarryOverflow AddWithCarry(InstructionBlock *instruction, llvm::Value *x, llvm::Value *y, llvm::Value *carry_in); +}; \ No newline at end of file diff --git a/src/binary_translation/BinarySearch.h b/src/binary_translation/BinarySearch.h new file mode 100644 index 000000000..e93332afd --- /dev/null +++ b/src/binary_translation/BinarySearch.h @@ -0,0 +1,22 @@ +#pragma once +#include "common/logging/log.h" +#include + +// Used for debugging + +struct BinarySearch +{ + size_t min; + size_t mid; + size_t max; + BinarySearch(size_t max) : min(0), mid(max / 2), max(max) { } + BinarySearch(size_t min, size_t max) : min(min), mid((min + max) / 2), max(max) { } + BinarySearch l() const { return BinarySearch(min, mid); } + BinarySearch r() const { return BinarySearch(mid, max); } + operator size_t() + { + LOG_DEBUG(BinaryTranslator, "BinarySearch: %x: %x - %x (%x, %d)", mid, max, min, max - min, (size_t)std::log2(max - min)); + return mid; + } + operator int() { return static_cast(*this); } +}; \ No newline at end of file diff --git a/src/binary_translation/BlockColors.cpp b/src/binary_translation/BlockColors.cpp new file mode 100644 index 000000000..633c487ca --- /dev/null +++ b/src/binary_translation/BlockColors.cpp @@ -0,0 +1,105 @@ +#include "BlockColors.h" +#include +#include "InstructionBlock.h" +#include +#include "common/logging/log.h" + +using namespace llvm; + +BlockColors::BlockColors(ModuleGen* module) : module(module) +{ + auto ir_builder = module->IrBuilder(); + function_type = FunctionType::get(ir_builder->getVoidTy(), ir_builder->getInt32Ty(), false); +} + +BlockColors::~BlockColors() +{ +} + +void BlockColors::AddBlock(InstructionBlock* block) +{ + if (block->HasColor()) return; + + std::stack current_color_stack; + current_color_stack.push(block); + auto color = colors.size(); + colors.push_back({ color }); + + while (current_color_stack.size()) + { + auto item = current_color_stack.top(); + current_color_stack.pop(); + + item->SetColor(color); + colors[color].instructions.push_back(item); + for (auto next : item->GetNexts()) + { + if (next->HasColor()) assert(next->GetColor() == color); + else current_color_stack.push(next); + } + for (auto prev : item->GetPrevs()) + { + if (prev->HasColor()) assert(prev->GetColor() == color); + else current_color_stack.push(prev); + } + } +} + +void BlockColors::GenerateFunctions() +{ + auto ir_builder = module->IrBuilder(); + + LOG_INFO(BinaryTranslator, "%x block colors", colors.size()); + + for (auto &color : colors) + { + auto function = Function::Create(function_type, GlobalValue::PrivateLinkage, + "ColorFunction", module->Module()); + color.function = function; + auto index = &function->getArgumentList().front(); + + auto entry_basic_block = BasicBlock::Create(getGlobalContext(), "Entry", function); + auto default_case_basic_block = BasicBlock::Create(getGlobalContext(), "Default", function); + + ir_builder->SetInsertPoint(default_case_basic_block); + ir_builder->CreateUnreachable(); + + ir_builder->SetInsertPoint(entry_basic_block); + auto switch_instruction = ir_builder->CreateSwitch(index, default_case_basic_block, color.instructions.size()); + for (size_t i = 0; i < color.instructions.size(); ++i) + { + switch_instruction->addCase(ir_builder->getInt32(i), color.instructions[i]->GetEntryBasicBlock()); + AddBasicBlocksToFunction(function, color.instructions[i]->GetEntryBasicBlock()); + } + } +} + +void BlockColors::AddBasicBlocksToFunction(Function* function, BasicBlock* basic_block) +{ + if (basic_block->getParent()) + { + assert(basic_block->getParent() == function); + return; + } + + std::stack basic_blocks; + basic_blocks.push(basic_block); + while (basic_blocks.size()) + { + auto top = basic_blocks.top(); + basic_blocks.pop(); + + top->insertInto(function); + auto terminator = top->getTerminator(); + for (auto i = 0; i < terminator->getNumSuccessors(); ++i) + { + auto next = terminator->getSuccessor(i); + if (next->getParent()) + { + assert(next->getParent() == function); + continue; + } + basic_blocks.push(next); + } + } +} \ No newline at end of file diff --git a/src/binary_translation/BlockColors.h b/src/binary_translation/BlockColors.h new file mode 100644 index 000000000..206053ee1 --- /dev/null +++ b/src/binary_translation/BlockColors.h @@ -0,0 +1,50 @@ +#include + +namespace llvm +{ + class BasicBlock; + class Function; + class FunctionType; +} +class InstructionBlock; +class ModuleGen; + +/* + +Responsible to partition the blocks by connectivity, each disjoined graph gets a color +And to generate a function for each color + +*/ + +class BlockColors +{ +public: + BlockColors(ModuleGen *module); + ~BlockColors(); + + void AddBlock(InstructionBlock *block); + // Generates a function for each color + void GenerateFunctions(); + + llvm::FunctionType *GetFunctionType() { return function_type; } + size_t GetColorCount() const { return colors.size(); } + size_t GetColorInstructionCount(size_t color) const { return colors[color].instructions.size(); } + InstructionBlock *GetColorInstruction(size_t color, size_t index) { return colors[color].instructions[index]; } + llvm::Function *GetColorFunction(size_t color) { return colors[color].function; } +private: + ModuleGen *module; + + // void ColorFunction(int i) + // Runs the code for color->instructions[i] + llvm::FunctionType *function_type; + + void AddBasicBlocksToFunction(llvm::Function *function, llvm::BasicBlock *basic_block); + + struct Color + { + size_t color; + std::vector instructions; + llvm::Function *function; + }; + std::vector colors; +}; \ No newline at end of file diff --git a/src/binary_translation/CMakeLists.txt b/src/binary_translation/CMakeLists.txt new file mode 100644 index 000000000..c6f82e8ff --- /dev/null +++ b/src/binary_translation/CMakeLists.txt @@ -0,0 +1,46 @@ +set(SRCS + main.cpp + CodeGen.cpp + ModuleGen.cpp + Disassembler.cpp + InstructionBlock.cpp + MachineState.cpp + TBAA.cpp + ARMFuncs.cpp + BlockColors.cpp + + Instructions/Instruction.cpp + Instructions/MovShift.cpp + Instructions/Branch.cpp + Instructions/Arithmetic.cpp + Instructions/Ldr.cpp + Instructions/Str.cpp + ) +set(HEADERS + CodeGen.h + ModuleGen.h + Disassembler.h + InstructionBlock.h + MachineState.h + TBAA.h + BinarySearch.h + ARMFuncs.h + BlockColors.h + + Instructions/Types.h + Instructions/Types.h + Instructions/Instruction.h + Instructions/MovShift.h + Instructions/Branch.h + Instructions/Arithmetic.h + Instructions/Ldr.h + Instructions/Str.h + ) +create_directory_groups(${SRCS} ${HEADERS}) + +include_directories(.) +add_executable(binary_translate ${SRCS} ${HEADERS}) +target_link_libraries(binary_translate ${llvm_libs}) +target_link_libraries(binary_translate core common video_core) +target_link_libraries(binary_translate inih) +target_link_libraries(binary_translate ${PLATFORM_LIBRARIES}) diff --git a/src/binary_translation/CodeGen.cpp b/src/binary_translation/CodeGen.cpp new file mode 100644 index 000000000..a627624bd --- /dev/null +++ b/src/binary_translation/CodeGen.cpp @@ -0,0 +1,170 @@ +#include "CodeGen.h" +#include "ModuleGen.h" + +#include "core/loader/loader.h" +#include "common/logging/log.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +CodeGen::CodeGen(const char* output_object_filename, const char* output_debug_filename, bool verify) + : output_object_filename(output_object_filename), + output_debug_filename(output_debug_filename), + verify(verify) +{ +} + +CodeGen::~CodeGen() +{ +} + +void CodeGen::Run() +{ + if (!Loader::ROMCodeStart) + { + LOG_CRITICAL(BinaryTranslator, "No information from the loader about ROM file."); + return; + } + + InitializeLLVM(); + GenerateModule(); + GenerateDebugFiles(); + if (!Verify()) return; + OptimizeAndGenerate(); +} + +void CodeGen::InitializeLLVM() +{ + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + + auto triple_string = sys::getProcessTriple(); +#ifdef _WIN32 + // LLVM doesn't know how to load coff files + // It can handle elf files in every platform + triple_string += "-elf"; +#endif + + triple = llvm::make_unique(triple_string); + + // This engine builder is needed to get the target machine. It requires a module + // but takes ownership of it so the main module cannot be passed here. + EngineBuilder engine_builder(make_unique("", getGlobalContext())); + target_machine.reset(engine_builder.selectTarget(*triple, "", "", SmallVector())); + + module = make_unique("Module", getGlobalContext()); + module->setTargetTriple(triple_string); +} + +void CodeGen::GenerateModule() +{ + moduleGenerator = std::make_unique(module.get(), verify); + moduleGenerator->Run(); +} + +void CodeGen::GenerateDebugFiles() +{ + if (!output_debug_filename) return; + + LOG_INFO(BinaryTranslator, "Writing debug file"); + std::ofstream file(output_debug_filename); + if (!file) + { + LOG_ERROR(BinaryTranslator, "Cannot create debug file: %s", output_debug_filename); + return; + } + raw_os_ostream stream(file); + + module->print(stream, nullptr); + stream.flush(); + file.close(); + LOG_INFO(BinaryTranslator, "Done"); +} + +bool CodeGen::Verify() +{ + LOG_INFO(BinaryTranslator, "Verifying"); + raw_os_ostream os(std::cout); + if (verifyModule(*module, &os)) + { + LOG_CRITICAL(BinaryTranslator, "Verify failed"); + return false; + } + LOG_INFO(BinaryTranslator, "Done"); + return true; +} + +void CodeGen::OptimizeAndGenerate() +{ + /* + * Taken from opt for O3 + */ + PassManagerBuilder pass_manager_builder; + + legacy::FunctionPassManager function_pass_manager(module.get()); + legacy::PassManager pass_manager; + + module->setDataLayout(*target_machine->getDataLayout()); + + pass_manager.add(createVerifierPass()); + pass_manager.add(new TargetLibraryInfoWrapperPass(*triple.get())); + + pass_manager_builder.OptLevel = 3; + pass_manager_builder.SizeLevel = 0; + pass_manager_builder.Inliner = createFunctionInliningPass(3, 0); + pass_manager_builder.LoopVectorize = true; + pass_manager_builder.SLPVectorize = true; + + pass_manager_builder.populateFunctionPassManager(function_pass_manager); + + pass_manager_builder.OptLevel = 1; + pass_manager_builder.SizeLevel = 0; + pass_manager_builder.LoopVectorize = false; + pass_manager_builder.SLPVectorize = false; + pass_manager_builder.populateModulePassManager(pass_manager); + + LOG_INFO(BinaryTranslator, "Optimizing functions"); + function_pass_manager.doInitialization(); + for (auto &function : *module) + function_pass_manager.run(function); + function_pass_manager.doFinalization(); + LOG_INFO(BinaryTranslator, "Done"); + + pass_manager.add(createVerifierPass()); + + MCContext *context; + std::ofstream file(output_object_filename, std::ios::binary); + if (!file) + { + LOG_CRITICAL(BinaryTranslator, "Cannot create object file: %s", output_object_filename); + return; + } + raw_os_ostream fstream(file); + buffer_ostream* stream = new buffer_ostream(fstream); + + if (target_machine->addPassesToEmitMC(pass_manager, context, *stream, false)) + { + LOG_CRITICAL(BinaryTranslator, "Target does not support MC emission!"); + return; + } + LOG_INFO(BinaryTranslator, "Generating code"); + pass_manager.run(*module); + stream->flush(); + delete stream; + fstream.flush(); + file.close(); + LOG_INFO(BinaryTranslator, "Done"); +} \ No newline at end of file diff --git a/src/binary_translation/CodeGen.h b/src/binary_translation/CodeGen.h new file mode 100644 index 000000000..c462f99af --- /dev/null +++ b/src/binary_translation/CodeGen.h @@ -0,0 +1,37 @@ +#include +#include + +namespace llvm +{ + class TargetMachine; + class Module; +} + +class ModuleGen; + +/* + * Holds alls the basic llvm structures + */ +class CodeGen +{ +public: + CodeGen(const char *output_object_filename, const char *output_debug_filename, bool verify); + ~CodeGen(); + + void Run(); + void InitializeLLVM(); + void GenerateModule(); + void GenerateDebugFiles(); + bool Verify(); + void OptimizeAndGenerate(); +private: + const char *output_object_filename; + const char *output_debug_filename; + bool verify; + + std::unique_ptr moduleGenerator; + + std::unique_ptr triple; + std::unique_ptr target_machine; + std::unique_ptr module; +}; \ No newline at end of file diff --git a/src/binary_translation/Disassembler.cpp b/src/binary_translation/Disassembler.cpp new file mode 100644 index 000000000..3a2e12182 --- /dev/null +++ b/src/binary_translation/Disassembler.cpp @@ -0,0 +1,23 @@ +#include "Disassembler.h" +#include "Instructions/Instruction.h" +#include + +std::vector g_read_functions; + +RegisterInstructionBase::RegisterInstructionBase(CreateFunctionType create_function) +{ + g_read_functions.push_back(create_function); +} + +std::unique_ptr Disassembler::Disassemble(u32 instruction, u32 address) +{ + for (auto read_function : g_read_functions) + { + auto result = read_function(instruction, address); + if (result != nullptr) + { + return std::unique_ptr(result); + } + } + return nullptr; +} \ No newline at end of file diff --git a/src/binary_translation/Disassembler.h b/src/binary_translation/Disassembler.h new file mode 100644 index 000000000..9987634e9 --- /dev/null +++ b/src/binary_translation/Disassembler.h @@ -0,0 +1,43 @@ +#include "common/common_types.h" +#include + +class Instruction; + +class Disassembler +{ +public: + /* + * Returns the instruction at address or null if unknown or not translatable + * address is used for PC relative operations + */ + static std::unique_ptr Disassemble(u32 instruction, u32 address); +}; + +class RegisterInstructionBase +{ +public: + typedef Instruction *(*CreateFunctionType)(u32 instruction, u32 address); + + RegisterInstructionBase(CreateFunctionType create_function); +}; + +/* + * Instantiate this class in a source file to register instruction in the disassembler + */ +template +class RegisterInstruction : RegisterInstructionBase +{ +public: + RegisterInstruction() : RegisterInstructionBase(&RegisterInstruction::Create) {} +private: + static Instruction *Create(u32 instruction, u32 address) + { + auto result = new DerivedInstruction(); + if (!result->Read(instruction, address)) + { + delete result; + return nullptr; + } + return result; + } +}; \ No newline at end of file diff --git a/src/binary_translation/InstructionBlock.cpp b/src/binary_translation/InstructionBlock.cpp new file mode 100644 index 000000000..47f68064f --- /dev/null +++ b/src/binary_translation/InstructionBlock.cpp @@ -0,0 +1,60 @@ +#include "InstructionBlock.h" +#include "ModuleGen.h" +#include "Instructions/Instruction.h" +#include +#include +#include "MachineState.h" + +InstructionBlock::InstructionBlock(ModuleGen* module, Instruction* instruction) + : module(module), + instruction(std::unique_ptr(instruction)) +{ + std::stringstream ss; + ss << std::hex << std::setfill('0') << std::setw(8) << instruction->Address() << "_"; + address_string = ss.str(); +} + +InstructionBlock::~InstructionBlock() +{ +} + +void InstructionBlock::GenerateEntryBlock() +{ + entry_basic_block = CreateBasicBlock("Entry"); +} + +void InstructionBlock::GenerateCode() +{ + auto ir_builder = Module()->IrBuilder(); + ir_builder->SetInsertPoint(entry_basic_block); + + module->GenerateIncInstructionCount(); + + instruction->GenerateCode(this); +} + +llvm::Value *InstructionBlock::Read(Register reg) +{ + return module->Machine()->ReadRegiser(reg,true); +} + +llvm::Value *InstructionBlock::Write(Register reg, llvm::Value *value) +{ + return module->Machine()->WriteRegiser(reg, value); +} + +llvm::BasicBlock *InstructionBlock::CreateBasicBlock(const char *name) +{ + return llvm::BasicBlock::Create(llvm::getGlobalContext(), address_string + name); +} + +void InstructionBlock::Link(InstructionBlock* prev, InstructionBlock* next) +{ + prev->nexts.push_back(next); + next->prevs.push_back(prev); +} + +u32 InstructionBlock::Address() const +{ + return instruction->Address(); +} \ No newline at end of file diff --git a/src/binary_translation/InstructionBlock.h b/src/binary_translation/InstructionBlock.h new file mode 100644 index 000000000..8f87bb73a --- /dev/null +++ b/src/binary_translation/InstructionBlock.h @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include "ModuleGen.h" + +namespace llvm +{ + class Value; + class BasicBlock; +} + +class Instruction; + +enum class Register; + +/* + * An instruction blocks + * Holds the entry and exit points for an instruction + * Responsible to generate the code + */ +class InstructionBlock +{ +public: + InstructionBlock(ModuleGen *module, Instruction *instruction); + ~InstructionBlock(); + + /* + * Generates the basic block of the instruction + */ + void GenerateEntryBlock(); + + /* + * Generates the code for the instruction + */ + void GenerateCode(); + + /* + * Generates code to read the register + */ + llvm::Value *Read(Register reg); + /* + * Generates code to write the value + * Returns the write instruction = written value + */ + llvm::Value *Write(Register reg, llvm::Value *value); + + /* + * Creates a basic block for use by instructions + */ + llvm::BasicBlock *CreateBasicBlock(const char *name); + /* + * Links two instructions, adding to prev and next lists + */ + static void Link(InstructionBlock *prev, InstructionBlock *next); + + u32 Address() const; + ModuleGen *Module() { return module; } + llvm::IRBuilder<> *IrBuilder() { return module->IrBuilder(); } + + llvm::BasicBlock *GetEntryBasicBlock() { return entry_basic_block; } + + bool HasColor() const { return has_color; } + void SetColor(size_t color) { this->color = color; has_color = true; } + size_t GetColor() const { return color; } + + std::list GetNexts() const { return nexts; } + std::list GetPrevs() const { return prevs; } +private: + // Textual representation of the address + // Used to generate names + std::string address_string; + + ModuleGen *module; + std::unique_ptr instruction; + + // The block at the entry to instruction + llvm::BasicBlock *entry_basic_block; + + bool has_color = false; + size_t color; + + std::list nexts; + std::list prevs; +}; \ No newline at end of file diff --git a/src/binary_translation/Instructions/Arithmetic.cpp b/src/binary_translation/Instructions/Arithmetic.cpp new file mode 100644 index 000000000..1abe31836 --- /dev/null +++ b/src/binary_translation/Instructions/Arithmetic.cpp @@ -0,0 +1,124 @@ +#include "Arithmetic.h" +#include "Disassembler.h" +#include +#include + +static RegisterInstruction register_instruction; + +bool IsSupported(Arithmetic::Op op) +{ + switch (op) + { + case Arithmetic::Op::BitwiseAnd: return true; + case Arithmetic::Op::BitwiseXor: return true; + case Arithmetic::Op::Subtract: return true; + case Arithmetic::Op::RevSubtract: return true; + case Arithmetic::Op::Add: return true; + case Arithmetic::Op::AddWithCarry: return true; + case Arithmetic::Op::SubtractWithCarry: return true; + case Arithmetic::Op::ReverseSubtractWithCarry: return true; + case Arithmetic::Op::BitwiseOr: return true; + case Arithmetic::Op::MoveAndShifts: return false; // Handled in MovShift.cpp + case Arithmetic::Op::BitwiseBitClear: return true; + case Arithmetic::Op::BitwiseNot: return false; // MVN + default: return false; + } +} + +bool IsBitwise(Arithmetic::Op op) +{ + return op == Arithmetic::Op::BitwiseAnd || + op == Arithmetic::Op::BitwiseXor || + op == Arithmetic::Op::BitwiseOr || + op == Arithmetic::Op::BitwiseBitClear; +} + +bool Arithmetic::Decode() +{ + if (ReadFields({ CondDef(), FieldDef<3>(0), FieldDef<4>(&op), FieldDef<1>(&set_flags), FieldDef<4>(&rn), + FieldDef<4>(&rd), FieldDef<5>(&imm5), FieldDef<2>(&type), FieldDef<1>(0), FieldDef<4>(&rm)})) + { + form = Form::Register; + if (rd == Register::PC && set_flags) return false; // SEE SUBS PC, LR and related instructions; + if (rn == Register::PC) return false; + if (rm == Register::PC) return false; + return IsSupported(op); + } + if (ReadFields({ CondDef(), FieldDef<3>(1), FieldDef<4>(&op), FieldDef<1>(&set_flags), FieldDef<4>(&rn), + FieldDef<4>(&rd), FieldDef<12>(&imm12) })) + { + form = Form::Immediate; + if (rd == Register::PC && set_flags) return false; // SEE SUBS PC, LR and related instructions; + if (rn == Register::PC) return false; + return IsSupported(op); + } + return false; +} + +void Arithmetic::GenerateInstructionCode(InstructionBlock* instruction_block) +{ + auto ir_builder = instruction_block->Module()->IrBuilder(); + + llvm::Value *left, *right, *carry_in; + ARMFuncs::ResultCarryOverflow result = { nullptr, nullptr, nullptr }; + auto bitwise = IsBitwise(op); + + carry_in = instruction_block->Read(Register::C); + + left = instruction_block->Read(rn); + + if (form == Form::Register) + { + if (bitwise) + { + auto shift_tn = ARMFuncs::DecodeImmShift(instruction_block, type, imm5); + auto shifted_carry = ARMFuncs::Shift_C(instruction_block, instruction_block->Read(rm), shift_tn.type, shift_tn.amount, carry_in); + right = shifted_carry.result; + result.carry = shifted_carry.carry; + } + else + { + auto shift_tn = ARMFuncs::DecodeImmShift(instruction_block, type, imm5); + right = ARMFuncs::Shift(instruction_block, instruction_block->Read(rm), shift_tn.type, shift_tn.amount, carry_in); + } + } + else + { + if (bitwise) + { + auto imm32_carry = ARMFuncs::ARMExpandImm_C(instruction_block, imm12, carry_in); + right = imm32_carry.result; + result.carry = imm32_carry.carry; + } + else + { + right = ARMFuncs::ARMExpandImm(instruction_block, imm12); + } + } + + switch (op) + { + case Op::BitwiseAnd: result.result = ir_builder->CreateAnd(left, right); break; + case Op::BitwiseXor: result.result = ir_builder->CreateXor(left, right); break; + case Op::Subtract: result = ARMFuncs::AddWithCarry(instruction_block, left, ir_builder->CreateNot(right), ir_builder->getInt32(1)); break; + case Op::RevSubtract: result = ARMFuncs::AddWithCarry(instruction_block, ir_builder->CreateNot(left), right, ir_builder->getInt32(1)); break; + case Op::Add: result = ARMFuncs::AddWithCarry(instruction_block, left, right, ir_builder->getInt32(0)); break; + case Op::AddWithCarry: result = ARMFuncs::AddWithCarry(instruction_block, left, right, carry_in); break; + case Op::SubtractWithCarry: result = ARMFuncs::AddWithCarry(instruction_block, left, ir_builder->CreateNot(right), carry_in); break; + case Op::ReverseSubtractWithCarry: result = ARMFuncs::AddWithCarry(instruction_block, ir_builder->CreateNot(left), right, carry_in); break; + case Op::BitwiseOr: result.result = ir_builder->CreateOr(left, right); break; + case Op::BitwiseBitClear: result.result = ir_builder->CreateAnd(left, ir_builder->CreateNot(right)); break; + default: break; + } + + instruction_block->Write(rd, result.result); + + if (set_flags) + { + instruction_block->Write(Register::N, ir_builder->CreateICmpSLT(result.result, ir_builder->getInt32(0))); + instruction_block->Write(Register::Z, ir_builder->CreateICmpEQ(result.result, ir_builder->getInt32(0))); + instruction_block->Write(Register::C, result.carry); + if (result.overflow) + instruction_block->Write(Register::V, result.overflow); + } +} \ No newline at end of file diff --git a/src/binary_translation/Instructions/Arithmetic.h b/src/binary_translation/Instructions/Arithmetic.h new file mode 100644 index 000000000..99686d589 --- /dev/null +++ b/src/binary_translation/Instructions/Arithmetic.h @@ -0,0 +1,35 @@ +#include "Instruction.h" +#include "Types.h" + +/* +* Data processing instructions +* ARMv7-A 5.2.1 (register), 5.2.2 (register-shifted register), 5.2.3 (immediate) +*/ + +class Arithmetic : public Instruction +{ +public: + enum class Form + { + Register, RegisterShiftedRegister, Immediate + }; + enum class Op + { + BitwiseAnd = 0, BitwiseXor, Subtract, RevSubtract, Add, AddWithCarry, SubtractWithCarry, ReverseSubtractWithCarry, + // Compare, Test, Misc + BitwiseOr = 12, MoveAndShifts, BitwiseBitClear, BitwiseNot + }; + + bool Decode() override; + void GenerateInstructionCode(InstructionBlock* instruction_block) override; +private: + Form form; + Op op; + bool set_flags; + Register rn; + Register rd; + u32 imm5; + u32 imm12; + u32 type; + Register rm; +}; \ No newline at end of file diff --git a/src/binary_translation/Instructions/Branch.cpp b/src/binary_translation/Instructions/Branch.cpp new file mode 100644 index 000000000..1680b822a --- /dev/null +++ b/src/binary_translation/Instructions/Branch.cpp @@ -0,0 +1,48 @@ +#include "Branch.h" +#include "Disassembler.h" +#include "InstructionBlock.h" +#include "ModuleGen.h" + +static RegisterInstruction register_instruction; + +bool Branch::Decode() +{ + // B imm, BL imm + if (ReadFields({ CondDef(), FieldDef<3>(5), FieldDef<1>(&link), FieldDef<24>(&imm24) })) + { + form = Form::Immediate; + return true; + } + // BLX reg + if (ReadFields({ CondDef(), FieldDef<24>(0x12fff3), FieldDef<4>(&rm) })) + { + if (rm == Register::PC) return false; + + link = true; + form = Form::Register; + return true; + } + return false; +} + +void Branch::GenerateInstructionCode(InstructionBlock* instruction_block) +{ + auto ir_builder = instruction_block->Module()->IrBuilder(); + if (link) + { + instruction_block->Write(Register::LR, ir_builder->getInt32(instruction_block->Address() + 4)); + } + if (form == Form::Immediate) + { + auto pc = static_cast(imm24 << 2); + pc = pc << 6 >> 6; // Sign extend + pc += instruction_block->Address() + 8; + instruction_block->Module()->BranchWritePCConst(instruction_block, pc); + } + else + { + auto pc = instruction_block->Read(rm); + instruction_block->Write(Register::PC, pc); + instruction_block->Module()->BranchReadPC(); + } +} \ No newline at end of file diff --git a/src/binary_translation/Instructions/Branch.h b/src/binary_translation/Instructions/Branch.h new file mode 100644 index 000000000..7f75d660c --- /dev/null +++ b/src/binary_translation/Instructions/Branch.h @@ -0,0 +1,19 @@ +#include "Instruction.h" +#include "Types.h" + +class Branch : public Instruction +{ +public: + enum class Form + { + Immediate, Register + }; + + bool Decode() override; + void GenerateInstructionCode(InstructionBlock* instruction_block) override; +private: + Form form; + bool link; + u32 imm24; + Register rm; +}; \ No newline at end of file diff --git a/src/binary_translation/Instructions/Instruction.cpp b/src/binary_translation/Instructions/Instruction.cpp new file mode 100644 index 000000000..95101928c --- /dev/null +++ b/src/binary_translation/Instructions/Instruction.cpp @@ -0,0 +1,108 @@ +#include "Instruction.h" +#include "common/logging/log.h" +#include +#include "InstructionBlock.h" +#include "ModuleGen.h" +#include "MachineState.h" +#include "BinarySearch.h" + +Instruction::Instruction() +{ +} + +Instruction::~Instruction() +{ +} + +bool Instruction::Read(u32 instruction, u32 address) +{ + this->instruction = instruction; + this->address = address; + // Call the read of derived class + if (!Decode()) return false; + + if (cond == Condition::Invalid) return false; + return true; +} + +void Instruction::GenerateCode(InstructionBlock *instruction_block) +{ + auto ir_builder = instruction_block->Module()->IrBuilder(); + + if (cond == Condition::AL) + { + GenerateInstructionCode(instruction_block); + } + else + { + auto pred = instruction_block->Module()->Machine()->ConditionPassed(cond); + auto passed_block = instruction_block->CreateBasicBlock("Passed"); + auto not_passed_block = instruction_block->CreateBasicBlock("NotPassed"); + + ir_builder->CreateCondBr(pred, passed_block, not_passed_block); + + ir_builder->SetInsertPoint(passed_block); + GenerateInstructionCode(instruction_block); + // If the basic block is terminated there has been a jump + // If not, jump to the next not passed block (which will jump to the next instruction) + if (!ir_builder->GetInsertBlock()->getTerminator()) + { + ir_builder->CreateBr(not_passed_block); + } + + ir_builder->SetInsertPoint(not_passed_block); + } + // If the basic block is terminated there has been a jump + // If not, jump to the next instruction + if (!ir_builder->GetInsertBlock()->getTerminator()) + { + instruction_block->Module()->BranchWritePCConst(instruction_block, Address() + 4); + } +} + +bool Instruction::ReadFields(const std::initializer_list &fields) +{ + size_t total_bit_count = 0; + auto current_instruction = instruction; + + for (auto &field : fields) + { + total_bit_count += field.BitCount(); + // Read the upper bits + auto value = current_instruction >> (32 - field.BitCount()); + if (!field.Read(value)) return false; + // Remove the upper bits + current_instruction <<= field.BitCount(); + } + assert(total_bit_count == 32); + + return true; +} + +Instruction::FieldDefObject Instruction::CondDef() +{ + return FieldDef<4>(&cond); +} + +Instruction::FieldDefObject::FieldDefObject(u32 bit_count, u32 const_value) + : bit_count(bit_count), const_value(const_value), constant(true) +{ +} + +Instruction::FieldDefObject::FieldDefObject(u32 bit_count, void* field_address, WriteFunctionType write_function) + : bit_count(bit_count), field_address(field_address), write_function(write_function), constant(false) +{ +} + +bool Instruction::FieldDefObject::Read(u32 value) const +{ + if (constant) + { + return value == const_value; + } + else + { + write_function(value, field_address); + return true; + } +} \ No newline at end of file diff --git a/src/binary_translation/Instructions/Instruction.h b/src/binary_translation/Instructions/Instruction.h new file mode 100644 index 000000000..361a8a711 --- /dev/null +++ b/src/binary_translation/Instructions/Instruction.h @@ -0,0 +1,111 @@ +#pragma once +#include "common/common_types.h" +#include +#include "Types.h" + +class InstructionBlock; + +class Instruction +{ +protected: + class FieldDefObject; +public: + Instruction(); + virtual ~Instruction(); + + /* + * Reads the instruction. + * Returns true on success, or false otherwise + */ + bool Read(u32 instruction, u32 address); + + /* + * Generates non instruction specific code, and then calls GenerateInstructionCode + */ + void GenerateCode(InstructionBlock *instruction_block); + + u32 Address() const { return address; } +protected: + /* + * Derived classes must override this, and implement it by calling ReadFields + */ + virtual bool Decode() = 0; + /* + * Generates code for the instruction into the instruction block + * Derived classes must override this + */ + virtual void GenerateInstructionCode(InstructionBlock *instruction_block) = 0; + /* + * Reads fields from the instruction + * The fields come most significant first + */ + bool ReadFields(const std::initializer_list &fields); + + /* + * Creates a field definition for a constant + */ + template + static FieldDefObject FieldDef(u32 value); + /* + * Creates a field definition for a field + */ + template + static FieldDefObject FieldDef(Type *field); + /* + * Creates a field definition for the condition field + */ + FieldDefObject CondDef(); +private: + /* + * Function used by FieldDefObject to write to a field + */ + template + static void WriteFunction(u32 value, void *field_address); + + // Instruction value + u32 instruction; + // Instruction address + u32 address; + + Condition cond; +}; + +/* + * Object produced by FieldDef + */ +class Instruction::FieldDefObject +{ +public: + typedef void(*WriteFunctionType)(u32 value, void *field_address); +public: + // Constant + FieldDefObject(u32 bit_count, u32 const_value); + // Field + FieldDefObject(u32 bit_count, void *field_address, WriteFunctionType write_function); + bool Read(u32 value) const; + u32 BitCount() const { return bit_count; } +private: + u32 bit_count; + u32 const_value; + void *field_address; + WriteFunctionType write_function; + bool constant; +}; + +template +Instruction::FieldDefObject Instruction::FieldDef(u32 value) +{ + return FieldDefObject(BitCount, value); +} + +template +Instruction::FieldDefObject Instruction::FieldDef(Type* field) +{ + return FieldDefObject(BitCount, field, &Instruction::WriteFunction); +} + +template +void Instruction::WriteFunction(u32 value, void *field_address) +{ + *static_cast(field_address) = Type(value); +} \ No newline at end of file diff --git a/src/binary_translation/Instructions/Ldr.cpp b/src/binary_translation/Instructions/Ldr.cpp new file mode 100644 index 000000000..8fda9c1a5 --- /dev/null +++ b/src/binary_translation/Instructions/Ldr.cpp @@ -0,0 +1,107 @@ +#include "Ldr.h" +#include "Disassembler.h" +#include "InstructionBlock.h" +#include +#include +#include +#include "MachineState.h" + +static RegisterInstruction register_instruction; + +bool Ldr::Decode() +{ + if (ReadFields({ CondDef(), FieldDef<4>(5), FieldDef<1>(&U), FieldDef<7>(0x1f), + FieldDef<4>(&rt), FieldDef<12>(&imm12)})) + { + form = Form::PC; + return true; + } + if (ReadFields({ CondDef(), FieldDef<3>(2), FieldDef<1>(&P), FieldDef<1>(&U), FieldDef<1>(0), FieldDef<1>(&W), FieldDef<1>(1), FieldDef<4>(&rn), + FieldDef<4>(&rt), FieldDef<12>(&imm12) })) + { + form = Form::Reg; + + if (!P && W) return false; // SEE LDRT; + //if (rn == Register::SP && !P && U && !W && imm12 == 4) return false; // SEE POP; + if ((!P || W) && rn == rt) return false; // UNPREDICTABLE; + + return true; + } + if (ReadFields({ CondDef(), FieldDef<6>(0x22), FieldDef<1>(&W), FieldDef<1>(1), FieldDef<4>(&rn), + FieldDef<16>(®ister_list) })) + { + form = Form::MultiReg; + + //if (W && rn == Register::SP && register_list.size() > 1) return false; // SEE POP (ARM); + if (rn == Register::PC || register_list.size() < 1) return false; // UNPREDICTABLE; + if (W && register_list[(u32)rn]) return false; // UNPREDICTABLE; + + return true; + } + return false; +} + +void Ldr::GenerateInstructionCode(InstructionBlock* instruction_block) +{ + auto ir_builder = instruction_block->IrBuilder(); + + if (form != Form::MultiReg) + { + llvm::Value *address = nullptr; + llvm::Value *value = nullptr; + + auto add = (bool)U; + + if (form == Form::PC) + { + auto base = instruction_block->Address() + 8; + auto constAddress = add ? base + imm12 : base - imm12; + auto constAddressEnd = constAddress + 4; + // If the value is read only, inline it + if (constAddress >= Loader::ROMCodeStart && constAddressEnd <= (Loader::ROMCodeStart + Loader::ROMCodeSize) || + constAddress >= Loader::ROMReadOnlyDataStart && constAddressEnd <= (Loader::ROMReadOnlyDataStart + Loader::ROMReadOnlyDataSize)) + { + value = ir_builder->getInt32(Memory::Read32(constAddress)); + } + else + { + address = ir_builder->getInt32(constAddress); + } + } + else + { + auto index = P == 1; + auto wback = P == 0 || W == 1; + auto source_register = instruction_block->Read(rn); + auto imm32 = ir_builder->getInt32(add ? imm12 : -imm12); + + auto offset_address = ir_builder->CreateAdd(source_register, imm32); + address = index ? offset_address : source_register; + if (wback) + instruction_block->Write(rn, offset_address); + } + + if (!value) value = instruction_block->Module()->Machine()->ReadMemory32(address); + instruction_block->Write(rt, value); + + if (rt == Register::PC) + instruction_block->Module()->BranchReadPC(); + } + else + { + auto wback = (bool)W; + auto address = instruction_block->Read(rn); + for (auto i = 0; i < 16; ++i) + { + if (!register_list[i]) continue; + instruction_block->Write((Register)i, instruction_block->Module()->Machine()->ReadMemory32(address)); + address = ir_builder->CreateAdd(address, ir_builder->getInt32(4)); + } + + if (wback) + instruction_block->Write(rn, address); + + if (register_list[15]) + instruction_block->Module()->BranchReadPC(); + } +} \ No newline at end of file diff --git a/src/binary_translation/Instructions/Ldr.h b/src/binary_translation/Instructions/Ldr.h new file mode 100644 index 000000000..d4716d659 --- /dev/null +++ b/src/binary_translation/Instructions/Ldr.h @@ -0,0 +1,25 @@ +#include "Instruction.h" +#include "Types.h" +#include + +class Ldr : public Instruction +{ +public: + enum class Form + { + PC, Reg, MultiReg + }; + + bool Decode() override; + void GenerateInstructionCode(InstructionBlock* instruction_block) override; + +private: + Form form; + bool U; + Register rt; + u32 imm12; + bool P; + bool W; + Register rn; + std::bitset<16> register_list; +}; \ No newline at end of file diff --git a/src/binary_translation/Instructions/MovShift.cpp b/src/binary_translation/Instructions/MovShift.cpp new file mode 100644 index 000000000..fc00acfcb --- /dev/null +++ b/src/binary_translation/Instructions/MovShift.cpp @@ -0,0 +1,101 @@ +#include "MovShift.h" +#include "Disassembler.h" +#include "InstructionBlock.h" +#include "ModuleGen.h" +#include "ARMFuncs.h" +#include + +static RegisterInstruction register_instruction; + +bool MovShift::Decode() +{ + if (ReadFields({ CondDef(), FieldDef<3>(0), FieldDef<4>(13), FieldDef<1>(&s), FieldDef<4>(0), + FieldDef<4>(&rd), FieldDef<5>(&imm5), FieldDef<2>(&op2), FieldDef<1>(0), FieldDef<4>(&rm) })) + { + form = Form::Register; + if (rm == Register::PC) return false; + if (rd == Register::PC && s) return false; // SEE SUBS PC, LR and related instructions; + return true; + } + if (ReadFields({ CondDef(), FieldDef<7>(0x1d), FieldDef<1>(&s), FieldDef<4>(0), + FieldDef<4>(&rd), FieldDef<12>(&imm12) })) + { + form = Form::ImmediateA1; + return true; + } + if (ReadFields({ CondDef(), FieldDef<8>(0x30), FieldDef<4>(&imm4), + FieldDef<4>(&rd), FieldDef<12>(&imm12) })) + { + s = false; + form = Form::ImmediateA2; + if (rd == Register::PC) return false; // UNPREDICTIBLE + return true; + } + return false; +} + +void MovShift::GenerateInstructionCode(InstructionBlock* instruction_block) +{ + auto ir_builder = instruction_block->IrBuilder(); + + auto carry_in = instruction_block->Read(Register::C); + ARMFuncs::ResultCarry result = {}; + + switch (form) + { + case Form::Register: + result = { instruction_block->Read(rm), carry_in }; + switch (op2) + { + case Op2Type::MoveAndLSL: + if (imm5 != 0) + { + result = ARMFuncs::Shift_C(instruction_block, result.result, ARMFuncs::SRType::LSL, + ARMFuncs::DecodeImmShift(instruction_block, 0, imm5).amount, result.carry); + } + break; + case Op2Type::LSR: + result = ARMFuncs::Shift_C(instruction_block, result.result, ARMFuncs::SRType::LSR, + ARMFuncs::DecodeImmShift(instruction_block, 1, imm5).amount, result.carry); + break; + case Op2Type::ASR: + result = ARMFuncs::Shift_C(instruction_block, result.result, ARMFuncs::SRType::ASR, + ARMFuncs::DecodeImmShift(instruction_block, 2, imm5).amount, result.carry); + break; + case Op2Type::RRXAndROR: + if (imm5 == 0) + { + result = ARMFuncs::Shift_C(instruction_block, result.result, ARMFuncs::SRType::RRX, + ir_builder->getInt32(1), result.carry); + } + else + { + result = ARMFuncs::Shift_C(instruction_block, result.result, ARMFuncs::SRType::ROR, + ARMFuncs::DecodeImmShift(instruction_block, 3, imm5).amount, result.carry); + } + break; + } + break; + case Form::ImmediateA1: + result = ARMFuncs::ARMExpandImm_C(instruction_block, imm12, carry_in); + break; + case Form::ImmediateA2: + result.result = ir_builder->getInt32((imm4 << 12) | imm12); + break; + } + + instruction_block->Write(rd, result.result); + + if (s) + { + instruction_block->Write(Register::N, ir_builder->CreateTrunc(ir_builder->CreateLShr(result.result, 31), ir_builder->getInt1Ty())); + instruction_block->Write(Register::Z, ir_builder->CreateICmpEQ(result.result, ir_builder->getInt32(0))); + if (result.carry != carry_in) + instruction_block->Write(Register::C, result.carry); + } + + if (rd == Register::PC) + { + instruction_block->Module()->BranchReadPC(); + } +} \ No newline at end of file diff --git a/src/binary_translation/Instructions/MovShift.h b/src/binary_translation/Instructions/MovShift.h new file mode 100644 index 000000000..cbe87ed72 --- /dev/null +++ b/src/binary_translation/Instructions/MovShift.h @@ -0,0 +1,33 @@ +#include "Instruction.h" +#include "Types.h" + +/* + * Data processing instructions + * ARMv7-A 5.2.1 (register), 5.2.2 (register-shifted register, 5.2.3 (immediate) + */ + +class MovShift : public Instruction +{ +public: + enum class Op2Type + { + MoveAndLSL, LSR, ASR, RRXAndROR + }; + enum class Form + { + Register, ImmediateA1, ImmediateA2 + }; + + bool Decode() override; + void GenerateInstructionCode(InstructionBlock* instruction_block) override; +private: + Form form; + bool s; + Register rn; + Register rd; + Register rm; + u32 imm12; + u32 imm5; + u32 imm4; + Op2Type op2; +}; \ No newline at end of file diff --git a/src/binary_translation/Instructions/Str.cpp b/src/binary_translation/Instructions/Str.cpp new file mode 100644 index 000000000..f771b52db --- /dev/null +++ b/src/binary_translation/Instructions/Str.cpp @@ -0,0 +1,70 @@ +#include "Str.h" +#include "Disassembler.h" +#include "InstructionBlock.h" +#include +#include +#include +#include "MachineState.h" + +static RegisterInstruction register_instruction; + +bool Str::Decode() +{ + if (ReadFields({ CondDef(), FieldDef<3>(2), FieldDef<1>(&P), FieldDef<1>(&U), FieldDef<1>(0), FieldDef<1>(&W), FieldDef<1>(0), FieldDef<4>(&rn), + FieldDef<4>(&rt), FieldDef<12>(&imm12) })) + { + form = Form::Immediate; + + if (!P && W) return false; // SEE LDRT; + if ((!P || W) && (rn == rt || rn == Register::PC)) return false; // UNPREDICTABLE; + if (rn == Register::PC) return false; // Currently unimplemented + + return true; + } + if (ReadFields({ CondDef(), FieldDef<6>(0x24), FieldDef<1>(&W), FieldDef<1>(0), FieldDef<4>(&rn), + FieldDef<16>(®ister_list) })) + { + form = Form::MultiReg; + + if (rn == Register::PC || register_list.size() < 1) return false; // UNPREDICTABLE; + if (register_list[(int)Register::PC]) return false; // Currently unimplemented + + return true; + } + return false; +} + +void Str::GenerateInstructionCode(InstructionBlock* instruction_block) +{ + auto ir_builder = instruction_block->IrBuilder(); + + if (form == Form::Immediate) + { + auto add = U == 1; + auto index = P == 1; + auto wback = P == 0 || W == 1; + auto source_register = instruction_block->Read(rn); + auto imm32 = ir_builder->getInt32(add ? imm12 : -imm12); + + auto offset_address = ir_builder->CreateAdd(source_register, imm32); + auto address = index ? offset_address : source_register; + if (wback) + instruction_block->Write(rn, offset_address); + instruction_block->Module()->Machine()->WriteMemory32(address, instruction_block->Read(rt)); + } + else + { + auto wback = W == 1; + auto write_back_address = ir_builder->CreateSub(instruction_block->Read(rn), ir_builder->getInt32(4 * register_list.count())); + auto address = write_back_address; + for (auto i = 0; i < 16; ++i) + { + if (!register_list[i]) continue; + instruction_block->Module()->Machine()->WriteMemory32(address, instruction_block->Read((Register)i)); + address = ir_builder->CreateAdd(address, ir_builder->getInt32(4)); + } + + if (wback) + instruction_block->Write(rn, write_back_address); + } +} \ No newline at end of file diff --git a/src/binary_translation/Instructions/Str.h b/src/binary_translation/Instructions/Str.h new file mode 100644 index 000000000..c66b821a6 --- /dev/null +++ b/src/binary_translation/Instructions/Str.h @@ -0,0 +1,25 @@ +#include "Instruction.h" +#include "Types.h" +#include + +class Str : public Instruction +{ +public: + enum class Form + { + Immediate, Reg, MultiReg + }; + + bool Decode() override; + void GenerateInstructionCode(InstructionBlock* instruction_block) override; + +private: + Form form; + bool U; + Register rt; + u32 imm12; + bool P; + bool W; + Register rn; + std::bitset<16> register_list; +}; \ No newline at end of file diff --git a/src/binary_translation/Instructions/Types.h b/src/binary_translation/Instructions/Types.h new file mode 100644 index 000000000..365ef70df --- /dev/null +++ b/src/binary_translation/Instructions/Types.h @@ -0,0 +1,19 @@ +#pragma once + +/* + * A register in a broad sense: R0-R15, and flags + */ +enum class Register +{ + R0, R1, R2, R3, R4, R5, R6, R7, + R8, R9, R10, R11, R12, SP, LR, PC, + N, Z, C, V, + Count +}; + +static const size_t RegisterCount = static_cast(Register::Count); + +enum class Condition +{ + EQ, NE, CS, CC, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, Invalid +}; \ No newline at end of file diff --git a/src/binary_translation/MachineState.cpp b/src/binary_translation/MachineState.cpp new file mode 100644 index 000000000..9e17bcbda --- /dev/null +++ b/src/binary_translation/MachineState.cpp @@ -0,0 +1,169 @@ +#include "MachineState.h" +#include "ModuleGen.h" +#include "Instructions/Types.h" +#include +#include +#include +#include +#include "TBAA.h" + +using namespace llvm; + +MachineState::MachineState(ModuleGen *module) : module(module) +{ +} + +void MachineState::GenerateGlobals() +{ + auto ir_builder = module->IrBuilder(); + +#if ARCHITECTURE_x86_64 + auto registers_global_initializer = ConstantPointerNull::get(IntegerType::getInt64PtrTy(getGlobalContext())); +#else + auto registers_global_initializer = ConstantPointerNull::get(IntegerType::getInt32PtrTy(getGlobalContext())); +#endif + registers_global = new GlobalVariable(*module->Module(), registers_global_initializer->getType(), + false, GlobalValue::ExternalLinkage, registers_global_initializer, "Registers"); + + // Flags is stored internally as i1* indexed in multiples of 4 + auto flags_global_initializer = ConstantPointerNull::get(IntegerType::getInt1PtrTy(getGlobalContext())); + flags_global = new GlobalVariable(*module->Module(), flags_global_initializer->getType(), + false, GlobalValue::ExternalLinkage, flags_global_initializer, "Flags"); + + + auto memory_read_32_signature = FunctionType::get(ir_builder->getInt32Ty(), ir_builder->getInt32Ty(), false); + auto memory_read_32_ptr = PointerType::get(memory_read_32_signature, 0); + auto memory_read_32_initializer = ConstantPointerNull::get(memory_read_32_ptr); + memory_read_32_global = new GlobalVariable(*module->Module(), memory_read_32_ptr, + false, GlobalValue::ExternalLinkage, memory_read_32_initializer, "Memory::Read32"); + + + llvm::Type *memory_write_32_args[] = { ir_builder->getInt32Ty(), ir_builder->getInt32Ty() }; + auto memory_write_32_signature = FunctionType::get(ir_builder->getVoidTy(), memory_write_32_args, false); + auto memory_write_32_ptr = PointerType::get(memory_write_32_signature, 0); + auto memory_write_32_initializer = ConstantPointerNull::get(memory_write_32_ptr); + memory_write_32_global = new GlobalVariable(*module->Module(), memory_write_32_ptr, + false, GlobalValue::ExternalLinkage, memory_write_32_initializer, "Memory::Write32"); +} + +Value *MachineState::GetRegisterPtr(Register reg) +{ + Value *global; + unsigned index; + if (reg <= Register::PC) + { + global = registers_global; + index = static_cast(reg)-static_cast(Register::R0); + } + else + { + + global = flags_global; + index = (static_cast(reg)-static_cast(Register::N)) * 4; + } + auto base = module->IrBuilder()->CreateAlignedLoad(global, 4); + module->GetTBAA()->TagConst(base); +#if ARCHITECTURE_x86_64 + return module->IrBuilder()->CreateConstInBoundsGEP1_64(base, index); +#else + return module->IrBuilder()->CreateConstInBoundsGEP1_32(base->getType(),base, index); +#endif +} + +Value* MachineState::ReadRegiser(Register reg, bool allow_pc,bool full_length) +{ + assert(allow_pc || reg != Register::PC); +#if ARCHITECTURE_x86_64 + Value* load; + if (reg == Register::PC&&full_length) { + load = module->IrBuilder()->CreateAlignedLoad(GetRegisterPtr(reg), 4); + module->GetTBAA()->TagRegister(static_cast(load), reg); + } + else if(reg <= Register::PC){ + auto loadinst = module->IrBuilder()->CreateAlignedLoad(GetRegisterPtr(reg), 4); + module->GetTBAA()->TagRegister(loadinst, reg); + load = module->IrBuilder()->CreateIntCast(loadinst, module->IrBuilder()->getInt32Ty(),false); + } + else { + auto loadinst = module->IrBuilder()->CreateAlignedLoad(GetRegisterPtr(reg), 4); + module->GetTBAA()->TagRegister(loadinst, reg); + load = module->IrBuilder()->CreateIntCast(loadinst, module->IrBuilder()->getInt1Ty(), false); + } +#else + auto load = module->IrBuilder()->CreateAlignedLoad(GetRegisterPtr(reg), 4); + module->GetTBAA()->TagRegister(load, reg); +#endif + return load; +} + +Value* MachineState::WriteRegiser(Register reg, Value *value,bool full_length) +{ +#if ARCHITECTURE_x86_64 + Instruction* store; + if (reg == Register::PC && full_length) + store = module->IrBuilder()->CreateAlignedStore(value, GetRegisterPtr(reg), 4); + else if(reg <= Register::PC) + store = module->IrBuilder()->CreateAlignedStore(module->IrBuilder()->CreateIntCast(value, module->IrBuilder()->getInt64Ty(), false), GetRegisterPtr(reg), 4); + else + store = module->IrBuilder()->CreateAlignedStore(module->IrBuilder()->CreateIntCast(value, module->IrBuilder()->getInt1Ty(), false), GetRegisterPtr(reg), 4); +#else + auto store = module->IrBuilder()->CreateAlignedStore(value, GetRegisterPtr(reg), 4); +#endif + module->GetTBAA()->TagRegister(store, reg); + return store; +} + +Value* MachineState::ConditionPassed(Condition cond) +{ + auto ir_builder = module->IrBuilder(); + Value *pred = nullptr; + auto not = false; + switch (cond) + { + case Condition::NE: case Condition::CC: case Condition::PL: case Condition::VC: + case Condition::LS: case Condition::LT: case Condition::LE: + not = true; + cond = (Condition)((int)cond - 1); + } + + switch (cond) + { + case Condition::EQ: pred = ReadRegiser(Register::Z); break; + case Condition::CS: pred = ReadRegiser(Register::C); break; + case Condition::MI: pred = ReadRegiser(Register::N); break; + case Condition::VS: pred = ReadRegiser(Register::V); break; + case Condition::HI: pred = ir_builder->CreateAnd(ReadRegiser(Register::C), ir_builder->CreateNot(ReadRegiser(Register::Z))); break; + case Condition::GE: pred = ir_builder->CreateICmpEQ(ReadRegiser(Register::N), ReadRegiser(Register::V)); break; + case Condition::GT: pred = ir_builder->CreateAnd(ir_builder->CreateNot(ReadRegiser(Register::Z)), + ir_builder->CreateICmpEQ(ReadRegiser(Register::N), ReadRegiser(Register::V))); break; + case Condition::AL: pred = ir_builder->getInt1(true); + default: assert(false, "Invalid condition"); + } + + if (not) pred = ir_builder->CreateNot(pred); + return pred; +} + +llvm::Value* MachineState::ReadMemory32(llvm::Value* address) +{ + auto ir_builder = module->IrBuilder(); + + auto memory_read_32 = ir_builder->CreateLoad(memory_read_32_global); + module->GetTBAA()->TagConst(memory_read_32); + auto call = ir_builder->CreateCall(memory_read_32,address); + call->setOnlyReadsMemory(); + module->GetTBAA()->TagMemory(call); + return call; +} + +llvm::Value* MachineState::WriteMemory32(llvm::Value* address, llvm::Value* value) +{ + auto ir_builder = module->IrBuilder(); + + auto memory_write_32 = ir_builder->CreateLoad(memory_write_32_global); + module->GetTBAA()->TagConst(memory_write_32); + + auto call = ir_builder->CreateCall(memory_write_32, llvm::ArrayRef(std::vector{ address, value })); + module->GetTBAA()->TagMemory(call); + return value; +} \ No newline at end of file diff --git a/src/binary_translation/MachineState.h b/src/binary_translation/MachineState.h new file mode 100644 index 000000000..92c66cc1a --- /dev/null +++ b/src/binary_translation/MachineState.h @@ -0,0 +1,59 @@ +#pragma once + +enum class Condition; +enum class Register; +class ModuleGen; + +namespace llvm +{ + class Value; + class Instruction; + class GlobalVariable; +} + +/* +Contains all the machine state: + Registers, Flags, Memory +*/ +class MachineState +{ +public: + MachineState(ModuleGen *module); + + void GenerateGlobals(); + // allow_pc exists because most of the times reading the PC is not what the instruction meant + llvm::Value *ReadRegiser(Register reg, bool allow_pc = false,bool full_length = false); + llvm::Value *WriteRegiser(Register reg, llvm::Value *value, bool full_length = false); + llvm::Value* ConditionPassed(Condition cond); + llvm::Value* ReadMemory32(llvm::Value* address); + llvm::Value* WriteMemory32(llvm::Value* address, llvm::Value* value); +private: + // Returns the address of a register or a flag + llvm::Value *GetRegisterPtr(Register reg); + + ModuleGen *module; + + /* + * u32 *Registers; + * The registers of the cpu + */ + llvm::GlobalVariable *registers_global; + /* + * u32 *Flags; + * The flags of the cpu + * Orderered N, Z, C, V + */ + llvm::GlobalVariable *flags_global; + + /* + * u32 (u32) Memory::Read32 + * Reads the memory at address + */ + llvm::GlobalVariable *memory_read_32_global; + + /* + * void (u32, u32) Memory::Write32 + * Writes the memory at address + */ + llvm::GlobalVariable *memory_write_32_global; +}; \ No newline at end of file diff --git a/src/binary_translation/ModuleGen.cpp b/src/binary_translation/ModuleGen.cpp new file mode 100644 index 000000000..df0c5fb28 --- /dev/null +++ b/src/binary_translation/ModuleGen.cpp @@ -0,0 +1,308 @@ +#include "ModuleGen.h" +#include "Disassembler.h" +#include "core/loader/loader.h" +#include "core/mem_map.h" +#include "Instructions/Instruction.h" +#include "Instructions/Types.h" +#include "InstructionBlock.h" +#include "common/logging/log.h" +#include +#include +#include +#include +#include "MachineState.h" +#include "TBAA.h" +#include "BlockColors.h" + +using namespace llvm; + +ModuleGen::ModuleGen(llvm::Module* module, bool verify) + : module(module), + verify(verify) +{ + ir_builder = make_unique>(getGlobalContext()); + machine = make_unique(this); + tbaa = make_unique(); + block_colors = make_unique(this); +} + +ModuleGen::~ModuleGen() +{ +} + +void ModuleGen::Run() +{ + tbaa->GenerateTags(); + GenerateGlobals(); + + DecodeInstructions(); + GenerateInstructionsEntry(); + + GenerateCanRunFunction(); + GenerateRunFunction(); + GenerateGetBlockAddressFunction(); + + GenerateInstructionsCode(); + + ColorBlocks(); + GenerateBlockAddressArray(); +} + +void ModuleGen::GenerateIncInstructionCount() +{ + auto load = ir_builder->CreateLoad(instruction_count); + auto inc = ir_builder->CreateAdd(load, ir_builder->getInt32(1)); + auto store = ir_builder->CreateStore(inc, instruction_count); + tbaa->TagInstructionCount(load); + tbaa->TagInstructionCount(store); +} + +void ModuleGen::BranchReadPC() +{ + if (verify) + { + ir_builder->CreateRetVoid(); + } + else + { + auto call = ir_builder->CreateCall(run_function); + call->setTailCall(); + ir_builder->CreateRetVoid(); + } +} + +void ModuleGen::BranchWritePCConst(InstructionBlock *current, u64 pc) +{ + if (verify) + { + // Just write PC and exit on verify + machine->WriteRegiser(Register::PC, ir_builder->getInt32(pc)); + ir_builder->CreateRetVoid(); + } + else + { + auto i = instruction_blocks_by_pc.find(pc); + if (i != instruction_blocks_by_pc.end()) + { + // Found instruction, jump to it + ir_builder->CreateBr(i->second->GetEntryBasicBlock()); + InstructionBlock::Link(i->second, current); + } + else + { + // Didn't find instruction, write PC and exit + machine->WriteRegiser(Register::PC, ir_builder->getInt32(pc)); + ir_builder->CreateRetVoid(); + } + } +} + +void ModuleGen::GenerateGlobals() +{ + machine->GenerateGlobals(); + + auto function_pointer = PointerType::get(block_colors->GetFunctionType(), 0); + block_address_type = StructType::get(function_pointer, ir_builder->getInt32Ty(), nullptr); + block_address_not_present = ConstantStruct::get(block_address_type, ConstantPointerNull::get(function_pointer), ir_builder->getInt32(0), nullptr); + +#if ARCHITECTURE_x86_64 + auto get_block_address_function_type = FunctionType::get(block_address_type, ir_builder->getInt64Ty(), false); +#else + auto get_block_address_function_type = FunctionType::get(block_address_type, ir_builder->getInt32Ty(), false); +#endif + get_block_address_function = Function::Create(get_block_address_function_type, GlobalValue::PrivateLinkage, "GetBlockAddress", module); + + auto can_run_function_type = FunctionType::get(ir_builder->getInt1Ty(), false); + can_run_function = Function::Create(can_run_function_type, GlobalValue::ExternalLinkage, "CanRun", module); + + auto run_function_type = FunctionType::get(ir_builder->getVoidTy(), false); + run_function = Function::Create(run_function_type, GlobalValue::ExternalLinkage, "Run", module); + + block_address_array_base = Loader::ROMCodeStart / 4; + block_address_array_size = Loader::ROMCodeSize / 4; + + block_address_array_type = ArrayType::get(block_address_type, block_address_array_size); + block_address_array = new GlobalVariable(*module, block_address_array_type, true, GlobalValue::ExternalLinkage, nullptr, "BlockAddressArray"); + + // bool Verify - contains the value of verify for citra usage + new GlobalVariable(*module, ir_builder->getInt1Ty(), true, GlobalValue::ExternalLinkage, ir_builder->getInt1(verify), "Verify"); + + instruction_count = new GlobalVariable(*Module(), ir_builder->getInt32Ty(), false, GlobalValue::ExternalLinkage, + ir_builder->getInt32(0), "InstructionCount"); +} + +void ModuleGen::GenerateBlockAddressArray() +{ + auto local_block_address_array_values = std::make_unique(block_address_array_size); + std::fill( + local_block_address_array_values.get(), + local_block_address_array_values.get() + block_address_array_size, + block_address_not_present); + + /*for (auto i = 0; i < instruction_blocks.size(); ++i) + { + auto &block = instruction_blocks[i]; + auto entry_basic_block = block->GetEntryBasicBlock(); + auto index = block->Address() / 4 - block_address_array_base; + auto color_index = 0; + local_block_address_array_values[index] = BConst + }*/ + for (auto color = 0; color < block_colors->GetColorCount(); ++color) + { + auto function = block_colors->GetColorFunction(color); + for (auto i = 0; i < block_colors->GetColorInstructionCount(color); ++i) + { + auto block = block_colors->GetColorInstruction(color, i); + auto index = block->Address() / 4 - block_address_array_base; + auto value = ConstantStruct::get(block_address_type, function, ir_builder->getInt32(i), nullptr); + local_block_address_array_values[index] = value; + } + } + + auto local_block_address_array_values_ref = ArrayRef(local_block_address_array_values.get(), block_address_array_size); + auto local_blocks_address_array = ConstantArray::get(block_address_array_type, local_block_address_array_values_ref); + block_address_array->setInitializer(local_blocks_address_array); +} + +void ModuleGen::GenerateGetBlockAddressFunction() +{ + /* + entry_basic_block: + auto index = (pc - block_address_array_base) / 4; + if(((pc & 3) == 0) && index < block_address_array_size) + { + index_in_bounds_basic_block: + return block_address_array[index]; + } + else + { + index_out_of_bounds_basic_block: + return nullptr; + } + */ + auto pc = &*get_block_address_function->arg_begin(); + auto entry_basic_block = BasicBlock::Create(getGlobalContext(), "Entry", get_block_address_function); + auto index_in_bounds_basic_block = BasicBlock::Create(getGlobalContext(), "IndexInBounds", get_block_address_function); + auto index_out_of_bounds_basic_block = BasicBlock::Create(getGlobalContext(), "IndexOutOfBounds", get_block_address_function); + + ir_builder->SetInsertPoint(entry_basic_block); +#define _BITAPPEND(a,b) a ## b +#if ARCHITECTURE_x86_64 +#define BITAPPEND(c) _BITAPPEND(c,64) +#else +#define BITAPPEND(c) _BITAPPEND(c,32) +#endif + + auto index = ir_builder->CreateUDiv(pc, ir_builder->BITAPPEND(getInt)(4), "", true); + index = ir_builder->CreateSub(index, ir_builder->BITAPPEND(getInt)(block_address_array_base)); + auto in_bounds_pred = ir_builder->CreateICmpULT(index, ir_builder->BITAPPEND(getInt)(block_address_array_size)); + auto arm_pred = ir_builder->CreateICmpEQ(ir_builder->CreateAnd(pc, 3), ir_builder->BITAPPEND(getInt)(0)); + auto pred = ir_builder->CreateAnd(in_bounds_pred, arm_pred); + ir_builder->CreateCondBr(pred, index_in_bounds_basic_block, index_out_of_bounds_basic_block); + + ir_builder->SetInsertPoint(index_in_bounds_basic_block); + Value *gep_values[] = { ir_builder->BITAPPEND(getInt)(0), index }; + auto block_address = ir_builder->CreateLoad(ir_builder->CreateInBoundsGEP(block_address_array, gep_values)); + tbaa->TagConst(block_address); + ir_builder->CreateRet(block_address); + + ir_builder->SetInsertPoint(index_out_of_bounds_basic_block); + ir_builder->CreateRet(block_address_not_present); +#undef BITAPPEND(a) +#undef _BITAPPEND(a,b) +} + +void ModuleGen::GenerateCanRunFunction() +{ + // return GetBlockAddress(Read(PC)).function != nullptr; + auto basic_block = BasicBlock::Create(getGlobalContext(), "Entry", can_run_function); + + ir_builder->SetInsertPoint(basic_block); + auto block_address = ir_builder->CreateCall(get_block_address_function, machine->ReadRegiser(Register::PC, true,true)); + auto function = ir_builder->CreateExtractValue(block_address, 0); + ir_builder->CreateRet(ir_builder->CreateICmpNE(function, + ConstantPointerNull::get(cast(function->getType())))); +} + +void ModuleGen::GenerateRunFunction() +{ + /* + run_function_entry: + auto block = GetBlockAddress(Read(PC)) + if(block_address != nullptr) + { + block_present_basic_block: + block.function(block.index); + return; + } + else + { + block_not_present_basic_block: + return; + } + */ + run_function_entry = BasicBlock::Create(getGlobalContext(), "Entry", run_function); + auto block_present_basic_block = BasicBlock::Create(getGlobalContext(), "BlockPresent", run_function); + auto block_not_present_basic_block = BasicBlock::Create(getGlobalContext(), "BlockNotPresent", run_function); + + ir_builder->SetInsertPoint(run_function_entry); + auto block_address = ir_builder->CreateCall(get_block_address_function, Machine()->ReadRegiser(Register::PC, true,true)); + auto function = ir_builder->CreateExtractValue(block_address, 0); + auto block_present_pred = ir_builder->CreateICmpNE(function, + ConstantPointerNull::get(cast(function->getType()))); + ir_builder->CreateCondBr(block_present_pred, block_present_basic_block, block_not_present_basic_block); + + ir_builder->SetInsertPoint(block_present_basic_block); + auto index = ir_builder->CreateExtractValue(block_address, 1); + auto call = ir_builder->CreateCall(function, index); + call->setTailCall(); + ir_builder->CreateRetVoid(); + + ir_builder->SetInsertPoint(block_not_present_basic_block); + ir_builder->CreateRetVoid(); +} + +void ModuleGen::DecodeInstructions() +{ + size_t generated = 0; + size_t total = 0; + for (auto i = Loader::ROMCodeStart; i <= Loader::ROMCodeStart + Loader::ROMCodeSize - 4; i += 4) + { + ++total; + auto bytes = Memory::Read32(i); + if (bytes == 0) continue; + auto instruction = Disassembler::Disassemble(bytes, i); + if (instruction == nullptr) continue; + ++generated; + auto instruction_block = std::make_unique(this, instruction.release()); + instruction_blocks_by_pc[i] = instruction_block.get(); + instruction_blocks.push_back(std::move(instruction_block)); + } + + LOG_INFO(BinaryTranslator, "Generated % 8d blocks of % 8d = % 3.1f%%", generated, total, 100.0 * generated / total); +} + +void ModuleGen::GenerateInstructionsEntry() +{ + for (auto &instruction : instruction_blocks) + { + instruction->GenerateEntryBlock(); + } +} + +void ModuleGen::GenerateInstructionsCode() +{ + for (auto &instruction : instruction_blocks) + { + instruction->GenerateCode(); + } +} + +void ModuleGen::ColorBlocks() +{ + for (auto &instruction : instruction_blocks) + { + block_colors->AddBlock(instruction.get()); + } + block_colors->GenerateFunctions(); +} \ No newline at end of file diff --git a/src/binary_translation/ModuleGen.h b/src/binary_translation/ModuleGen.h new file mode 100644 index 000000000..5cfdede67 --- /dev/null +++ b/src/binary_translation/ModuleGen.h @@ -0,0 +1,113 @@ +#pragma once +#include +#include +#include + +enum class Register; + +class InstructionBlock; +class MachineState; +class TBAA; +class BlockColors; + +namespace llvm +{ + class Module; +} + +class ModuleGen +{ +public: + /* + * Verify - produce a code that can be verified + * this is done by returning after every opcode + */ + explicit ModuleGen(llvm::Module *module, bool verify); + ~ModuleGen(); + + void Run(); + + void GenerateIncInstructionCount(); + // Generate code to read pc and run all following instructions, used in cases of indirect branch + void BranchReadPC(); + // Generate code to write to pc and run all following instructions, used in cases of direct branch + void BranchWritePCConst(InstructionBlock *current, u64 pc); + + llvm::IRBuilder<> *IrBuilder() { return ir_builder.get(); } + llvm::Module *Module() { return module; } + MachineState *Machine() { return machine.get(); } + TBAA *GetTBAA() { return tbaa.get(); } + +private: + // Generates the declarations of all the globals of the module + void GenerateGlobals(); + void GenerateBlockAddressArray(); + void GenerateGetBlockAddressFunction(); + void GenerateCanRunFunction(); + void GenerateRunFunction(); + // Creates InstructionBlock for each instruction + void DecodeInstructions(); + // Generates the entry basic blocks for each instruction + void GenerateInstructionsEntry(); + // Generates the code of each instruction + void GenerateInstructionsCode(); + // Must be run after the instruction code is generated since it depends on the + // inter block jumps + void ColorBlocks(); + + llvm::Module *module; + bool verify; + + std::unique_ptr machine; + std::unique_ptr tbaa; + + std::unique_ptr> ir_builder; + + size_t block_address_array_base; + size_t block_address_array_size; + /* + * struct BlockAddress + * { + * void (*function)(u32 index); + * u32 index; + * } + */ + llvm::StructType *block_address_type; + llvm::Constant *block_address_not_present; + /* + * i8 **BlockAddressArray; + * The array at [i/4 - block_address_array_base] contains the block address for the instruction at i + * or nullptr if it is not decoded + */ + llvm::ArrayType *block_address_array_type; + llvm::GlobalVariable *block_address_array; + /* + * i32 InstructionCount; + * The count of instructions executed + */ + llvm::GlobalVariable *instruction_count; + /* + * i8 *GetBlockAddress(u32 pc) + * Returns the address of the block for the instruction at pc + */ + llvm::Function *get_block_address_function; + /* + * bool CanRun() + * Returns whether there is a binary translation available for a PC + */ + llvm::Function *can_run_function; + /* + * void Run() + * Runs binary translated opcodes + */ + llvm::Function *run_function; + llvm::BasicBlock *run_function_entry; + + /* + * All the instruction blocks + */ + std::vector> instruction_blocks; + std::unordered_map instruction_blocks_by_pc; + + std::unique_ptr block_colors; +}; \ No newline at end of file diff --git a/src/binary_translation/TBAA.cpp b/src/binary_translation/TBAA.cpp new file mode 100644 index 000000000..c481e5750 --- /dev/null +++ b/src/binary_translation/TBAA.cpp @@ -0,0 +1,45 @@ +#include "TBAA.h" +#include +#include +#include +#include +#include + +using namespace llvm; + +void TBAA::GenerateTags() +{ + MDBuilder md_builder(getGlobalContext()); + + auto tbaa_root = md_builder.createTBAARoot("Root"); + + for (auto i = 0; i < RegisterCount; ++i) + { + std::stringstream ss; + ss << "Register_" << i; + register_nodes[i] = md_builder.createTBAAScalarTypeNode(ss.str(), tbaa_root); + } + const_node = md_builder.createTBAAScalarTypeNode("Readonly", tbaa_root); + instruction_count_node = md_builder.createTBAAScalarTypeNode("InstructionCount", tbaa_root); + memory_node = md_builder.createTBAAScalarTypeNode("Memory", tbaa_root); +} + +void TBAA::TagRegister(Instruction* instruction, Register reg) +{ + instruction->setMetadata(LLVMContext::MD_tbaa, register_nodes[(int)reg]); +} + +void TBAA::TagConst(Instruction* instruction) +{ + instruction->setMetadata(LLVMContext::MD_tbaa, const_node); +} + +void TBAA::TagInstructionCount(llvm::Instruction* instruction) +{ + instruction->setMetadata(LLVMContext::MD_tbaa, instruction_count_node); +} + +void TBAA::TagMemory(llvm::Instruction* instruction) +{ + instruction->setMetadata(LLVMContext::MD_tbaa, memory_node); +} \ No newline at end of file diff --git a/src/binary_translation/TBAA.h b/src/binary_translation/TBAA.h new file mode 100644 index 000000000..657b4fdde --- /dev/null +++ b/src/binary_translation/TBAA.h @@ -0,0 +1,34 @@ +#pragma once +#include "Instructions/Types.h" +#include + +namespace llvm +{ + class Instruction; + class MDNode; +} + +/* +Manages TBAA. +A TBAA type is generated for each register and global. +It is a bit of an abuse of TBAA but because nothing aliases it is a good way +to notify LLVM of it. +*/ + +class TBAA +{ +public: + void GenerateTags(); + + void TagRegister(llvm::Instruction *instruction, Register reg); + void TagConst(llvm::Instruction *instruction); + void TagInstructionCount(llvm::Instruction *instruction); + void TagMemory(llvm::Instruction *instruction); +private: + llvm::MDNode *register_nodes[RegisterCount]; + // Tag for everything that is never written. + // Since it is never written, one tag works + llvm::MDNode *const_node; + llvm::MDNode *instruction_count_node; + llvm::MDNode *memory_node; +}; \ No newline at end of file diff --git a/src/binary_translation/main.cpp b/src/binary_translation/main.cpp new file mode 100644 index 000000000..e03f42691 --- /dev/null +++ b/src/binary_translation/main.cpp @@ -0,0 +1,48 @@ +#include "common/logging/backend.h" +#include "common/logging/text_formatter.h" +#include "common/scope_exit.h" +#include "core/core.h" +#include "core/mem_map.h" +#include "core/hle/kernel/memory.h" +#include "core/loader/loader.h" +#include "codegen.h" +#include + +namespace cl = llvm::cl; + +cl::opt InputFilename(cl::Positional, cl::Required, cl::desc("")); +cl::opt OutputFilename(cl::Positional, cl::Required, cl::desc("")); +cl::opt DebugFilename(cl::Positional, cl::Optional, cl::desc("")); +cl::opt Verify("verify", cl::desc(""), cl::init(false)); + +int main(int argc, const char *const *argv) +{ + // Remove all llvm options + llvm::StringMap& options = cl::getRegisteredOptions(); + for (auto i = options.begin(); i != options.end(); ++i) + { + if (i->getValue() != &InputFilename && i->getValue() != &OutputFilename && i->getValue() != &DebugFilename) + { + i->getValue()->setHiddenFlag(cl::Hidden); + } + } + cl::ParseCommandLineOptions(argc, argv); + + auto input_rom = InputFilename.c_str(); + auto output_object = OutputFilename.c_str(); + auto output_debug = DebugFilename.getNumOccurrences() ? DebugFilename.c_str() : nullptr; + bool verify = Verify; + + Core::Init(); + Memory::Init(); + + auto load_result = Loader::LoadFile(input_rom); + if (Loader::ResultStatus::Success != load_result) + { + LOG_CRITICAL(BinaryTranslator, "Failed to load ROM (Error %i)!", load_result); + return -1; + } + + CodeGen code_generator(output_object, output_debug, verify); + code_generator.Run(); +} \ No newline at end of file diff --git a/src/citra/CMakeLists.txt b/src/citra/CMakeLists.txt index e7f8a17f9..89ca6dc3d 100644 --- a/src/citra/CMakeLists.txt +++ b/src/citra/CMakeLists.txt @@ -19,6 +19,9 @@ link_directories(${GLFW_LIBRARY_DIRS}) add_executable(citra ${SRCS} ${HEADERS}) target_link_libraries(citra core video_core common) target_link_libraries(citra ${GLFW_LIBRARIES} ${OPENGL_gl_LIBRARY} inih glad) +if(ENABLE_BINARY_TRANSLATION) + target_link_libraries(citra ${llvm_libs}) +endif() if (MSVC) target_link_libraries(citra getopt) endif() diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt index bbf6ae001..e36d5405b 100644 --- a/src/citra_qt/CMakeLists.txt +++ b/src/citra_qt/CMakeLists.txt @@ -83,6 +83,9 @@ target_link_libraries(citra-qt core video_core common qhexedit) target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS}) target_link_libraries(citra-qt ${PLATFORM_LIBRARIES}) +if(ENABLE_BINARY_TRANSLATION) + target_link_libraries(citra-qt ${llvm_libs}) +endif() if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD") install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") endif() diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index d186ba8f8..086b3ab39 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -58,7 +58,8 @@ namespace Log { CLS(Render) \ SUB(Render, Software) \ SUB(Render, OpenGL) \ - CLS(Loader) + CLS(Loader) \ + CLS(BinaryTranslator) // GetClassName is a macro defined by Windows.h, grrr... const char* GetLogClassName(Class log_class) { diff --git a/src/common/logging/log.h b/src/common/logging/log.h index 2d9323a7b..b684a698d 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -74,6 +74,7 @@ enum class Class : ClassType { Render_Software, ///< Software renderer backend Render_OpenGL, ///< OpenGL backend Loader, ///< ROM loader + BinaryTranslator, ///< Binary translator Count ///< Total number of logging classes }; diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 861b711c7..c684efad2 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -253,7 +253,17 @@ set(HEADERS settings.h system.h ) - +if(ENABLE_BINARY_TRANSLATION) + set(SRCS + ${SRCS} + binary_translation/BinaryTranslationLoader.cpp + ) + set(HEADERS + ${HEADERS} + mem_map.h + binary_translation/BinaryTranslationLoader.h + ) +endif() create_directory_groups(${SRCS} ${HEADERS}) add_library(core STATIC ${SRCS} ${HEADERS}) diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index 2cff2a26a..1bf03ee89 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -23,6 +23,10 @@ #include "core/arm/skyeye_common/armsupp.h" #include "core/arm/skyeye_common/vfp/vfp.h" +#if ENABLE_BINARY_TRANSLATION +#include "core/binary_translation/BinaryTranslationLoader.h" +#endif + #include "core/gdbstub/gdbstub.h" Common::Profiling::TimingCategory profile_execute("DynCom::Execute"); @@ -36,7 +40,8 @@ enum { CALL = (1 << 4), RET = (1 << 5), END_OF_PAGE = (1 << 6), - THUMB = (1 << 7) + THUMB = (1 << 7), + BINARY_TRANSLATED = (1 << 8) }; #define RM BITS(sht_oper, 0, 3) @@ -3554,6 +3559,12 @@ static int InterpreterTranslate(ARMul_State* cpu, int& bb_start, u32 addr) { translated: phys_addr += inst_size; +#if ENABLE_BINARY_TRANSLATION + if (BinaryTranslationLoader::CanRun(phys_addr, cpu->TFlag)) + { + inst_base->br = BINARY_TRANSLATED; + } +#endif if ((phys_addr & 0xfff) == 0) { inst_base->br = END_OF_PAGE; } @@ -3585,6 +3596,10 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) { GDBStub::BreakpointAddress breakpoint_data; +#if ENABLE_BINARY_TRANSLATION + BinaryTranslationLoader::SetCpuState(cpu); +#endif + #undef RM #undef RS @@ -3621,17 +3636,24 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) { // GCC and Clang have a C++ extension to support a lookup table of labels. Otherwise, fallback to a // clunky switch statement. +#if ENABLE_BINARY_TRANSLATION +#define BINARY_TRANSLATION_VERIFY_CALLBACK BinaryTranslationLoader::VerifyCallback(); +#else +#define BINARY_TRANSLATION_VERIFY_CALLBACK +#endif #if defined __GNUC__ || defined __clang__ #define GOTO_NEXT_INST \ GDB_BP_CHECK; \ if (num_instrs >= cpu->NumInstrsToExecute) goto END; \ num_instrs++; \ + BINARY_TRANSLATION_VERIFY_CALLBACK \ goto *InstLabel[inst_base->idx] #else #define GOTO_NEXT_INST \ GDB_BP_CHECK; \ if (num_instrs >= cpu->NumInstrsToExecute) goto END; \ num_instrs++; \ + BINARY_TRANSLATION_VERIFY_CALLBACK \ switch(inst_base->idx) { \ case 0: goto VMLA_INST; \ case 1: goto VMLS_INST; \ @@ -3905,6 +3927,9 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) { goto END; } } +#if ENABLE_BINARY_TRANSLATION + num_instrs = BinaryTranslationLoader::Run(num_instrs); +#endif if (cpu->TFlag) cpu->Reg[15] &= 0xfffffffe; diff --git a/src/core/binary_translation/BinaryTranslationLoader.cpp b/src/core/binary_translation/BinaryTranslationLoader.cpp new file mode 100644 index 000000000..09c228d00 --- /dev/null +++ b/src/core/binary_translation/BinaryTranslationLoader.cpp @@ -0,0 +1,262 @@ +#include "BinaryTranslationLoader.h" +#include "core/arm/skyeye_common/armstate.h" +#include "core/arm/skyeye_common/arm_regformat.h" +#include "common/logging/log.h" +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +bool g_enabled = false; +bool g_verify = false; +ARMul_State *g_state; + +std::unique_ptr g_memory_manager; +std::unique_ptr g_dyld; +std::unique_ptr g_loaded_object_info; + +void(*g_run_function)(); +bool(*g_can_run_function)(); +uint32_t *g_instruction_count; + +// Used by the verifier +struct SavedState +{ + SavedState() { } + SavedState(const ARMul_State &state) + { + memcpy(regs, &state.Reg[0], sizeof(regs)); + memcpy(flags, &state.NFlag, sizeof(flags)); + t_flag = state.TFlag; + } + void CopyTo(ARMul_State &state) + { + memcpy(&state.Reg[0], regs, sizeof(regs)); + memcpy(&state.NFlag, flags, sizeof(flags)); + t_flag = state.TFlag; + } + void SwapWith(ARMul_State &state) + { + SavedState arm_state = state; + std::swap(*this, arm_state); + arm_state.CopyTo(state); + } + void Print() + { + LOG_ERROR(BinaryTranslator, "%08x %08x %08x %08x %08x %08x %08x %08x", + regs[0], regs[1], regs[2], regs[3], + regs[4], regs[5], regs[6], regs[7]); + LOG_ERROR(BinaryTranslator, "%08x %08x %08x %08x %08x %08x %08x %08x", + regs[8], regs[9], regs[10], regs[11], + regs[12], regs[13], regs[14], regs[15]); + LOG_ERROR(BinaryTranslator, "%01x %01x %01x %01x %01x", flags[0], flags[1], flags[2], flags[3], t_flag); + } + + u32 regs[16]; + u32 flags[4]; + u32 t_flag; +}; + +bool operator==(const SavedState& lhs, const SavedState& rhs) +{ + return memcmp(&lhs, &rhs, sizeof(lhs)) == 0; +} + +bool operator!=(const SavedState& lhs, const SavedState& rhs) +{ + return memcmp(&lhs, &rhs, sizeof(lhs)) != 0; +} + +bool g_have_saved_state; // Whether there is a copied state +SavedState g_state_copy; +SavedState g_state_copy_before; + +void BinaryTranslationLoader::Load(FileUtil::IOFile& file) +{ + if (offsetof(ARMul_State, ZFlag) - offsetof(ARMul_State, NFlag) != 4 || + offsetof(ARMul_State, CFlag) - offsetof(ARMul_State, NFlag) != 8 || + offsetof(ARMul_State, VFlag) - offsetof(ARMul_State, NFlag) != 12) + { + LOG_WARNING(Loader, "Flags are unordered, cannot run optimized file"); + return; + } + + InitializeNativeTarget(); + + g_memory_manager = make_unique(); + + g_dyld = make_unique(*g_memory_manager.get(), *g_memory_manager.get()); + + auto size = file.GetSize(); + auto buffer = make_unique(size); + + file.ReadBytes(buffer.get(), size); + if (!file.IsGood()) + { + LOG_WARNING(Loader, "Cannot read optimized file"); + return; + } + + auto object_file = object::ObjectFile::createObjectFile(MemoryBufferRef(StringRef(buffer.get(), size), "")); + if (!object_file) + { + LOG_WARNING(Loader, "Cannot load optimized file"); + return; + } + + g_loaded_object_info = g_dyld->loadObject(*object_file->get()); + if (g_dyld->hasError()) + { + LOG_WARNING(Loader, "Cannot load optimized file, error %s", g_dyld->getErrorString().str().c_str()); + return; + } + + g_dyld->resolveRelocations(); + g_dyld->registerEHFrames(); + g_memory_manager->finalizeMemory(); + + g_run_function = static_cast((void*)g_dyld->getSymbol("Run").getAddress()); + g_can_run_function = static_cast((void*)g_dyld->getSymbol("CanRun").getAddress()); + auto verify_ptr = static_cast((void*)g_dyld->getSymbol("Verify").getAddress()); + g_instruction_count = static_cast((void*)g_dyld->getSymbol("InstructionCount").getAddress()); + auto memory_read_32_ptr = static_cast((void*)g_dyld->getSymbol("Memory::Read32").getAddress()); + auto memory_write_32_ptr = static_cast((void*)g_dyld->getSymbol("Memory::Write32").getAddress()); + + if (!g_run_function || !g_can_run_function || !verify_ptr || !g_instruction_count || !memory_read_32_ptr || !memory_write_32_ptr) + { + LOG_WARNING(Loader, "Cannot load optimized file, missing critical function"); + return; + } + + g_verify = *verify_ptr; + *memory_read_32_ptr = &Memory::Read32; + *memory_write_32_ptr = &Memory::Write32; + + g_enabled = true; + + LOG_INFO(Loader, "Binary translation enabled"); +} + +void BinaryTranslationLoader::SetCpuState(ARMul_State* state) +{ + if (!g_enabled) return; + + auto regs_ptr = static_cast((void*)g_dyld->getSymbol("Registers").getAddress()); + auto flags_ptr = static_cast((void*)g_dyld->getSymbol("Flags").getAddress()); + + *regs_ptr = &state->Reg[0]; + *flags_ptr = &state->NFlag; + g_have_saved_state = false; + g_state = state; +} + +bool BinaryTranslationLoader::CanRun(bool specific_address) +{ + if (!g_enabled) return false; + // Thumb not implemented + if (g_state->TFlag) return false; + if (specific_address) + if (!g_can_run_function()) return false; + return true; +} + +bool BinaryTranslationLoader::CanRun(u32 pc, bool tflag) +{ + if (!g_enabled) return false; + if (tflag) return false; + std::swap(g_state->Reg[15], pc); + auto result = g_can_run_function(); + std::swap(g_state->Reg[15], pc); + return result; +} + +uint32_t BinaryTranslationLoader::Run(uint32_t instruction_count) +{ + // No need to check the PC, Run does it anyway + if (!CanRun(false)) return instruction_count; + // If verify is enabled, it will run opcodes + if (g_verify) return instruction_count; + + return RunInternal(instruction_count); +} + +uint32_t BinaryTranslationLoader::RunInternal(uint32_t instruction_count) +{ + *g_instruction_count = instruction_count; + g_run_function(); + + g_state->TFlag = g_state->Reg[15] & 1; + if (g_state->TFlag) + g_state->Reg[15] &= 0xfffffffe; + else + g_state->Reg[15] &= 0xfffffffc; + + return *g_instruction_count; +} + +void Swap(void *a, void *b, size_t size) +{ + auto a_char = (char *)a; + auto b_char = (char *)b; + for (auto i = 0; i < size; ++i) + { + std::swap(a_char[i], b_char[i]); + } +} + +void BinaryTranslationLoader::VerifyCallback() +{ + if (!g_enabled || !g_verify) return; + + // Swap the PC and TFlag to the old state before checking if it can run + std::swap(g_state_copy.regs[15], g_state->Reg[15]); + std::swap(g_state_copy.t_flag, g_state->TFlag); + auto can_run = CanRun(true); + std::swap(g_state_copy.regs[15], g_state->Reg[15]); + std::swap(g_state_copy.t_flag, g_state->TFlag); + + if (g_have_saved_state && can_run) + { + // An opcode is finished, simulate it + + // Copy the state before + g_state_copy_before = g_state_copy; + + // Swap to the state before the opcode + g_state_copy.SwapWith(*g_state); + + // Run the opcode + RunInternal(0); + + // Test + auto current_as_saved_state = SavedState(*g_state); + if (current_as_saved_state != g_state_copy) + { + LOG_ERROR(BinaryTranslator, "Verify failed"); + LOG_ERROR(BinaryTranslator, "Regs Before"); + g_state_copy_before.Print(); + LOG_ERROR(BinaryTranslator, "Regs OK"); + g_state_copy.Print(); + LOG_ERROR(BinaryTranslator, "Regs not OK"); + current_as_saved_state.Print(); + + // Don't spam + g_enabled = false; + + // Make sure it has a valid state to continue to run + g_state_copy.CopyTo(*g_state); + } + } + else + { + // If this opcode is not translated or there is no saved state, just save the state and continue + g_state_copy = *g_state; + + g_have_saved_state = true; + } +} \ No newline at end of file diff --git a/src/core/binary_translation/BinaryTranslationLoader.h b/src/core/binary_translation/BinaryTranslationLoader.h new file mode 100644 index 000000000..b1a33a7a5 --- /dev/null +++ b/src/core/binary_translation/BinaryTranslationLoader.h @@ -0,0 +1,22 @@ +#include "common/file_util.h" +#include + +struct ARMul_State; + +class BinaryTranslationLoader +{ +public: + static void Load(FileUtil::IOFile& file); + static void SetCpuState(ARMul_State *state); + // Checks whether the cpu state can be run + // If specific_address, checks the specific PC too + static bool CanRun(bool specific_address); + // Checks whether the cpu state can run the specific address at the specific mode + static bool CanRun(u32 pc, bool tflag); + // Runs the state provided at SetCpuState. + // Returns instruction_count + number of instructions executed + static uint32_t Run(uint32_t instruction_count); + // Link between Run and VerifyCallback + static uint32_t RunInternal(uint32_t instruction_count); + static void VerifyCallback(); +}; \ No newline at end of file diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp index 8eed6a50a..3b0b6a8c8 100644 --- a/src/core/loader/3dsx.cpp +++ b/src/core/loader/3dsx.cpp @@ -14,6 +14,7 @@ #include "core/loader/elf.h" #include "core/loader/ncch.h" #include "core/memory.h" +#include "core/loader/loader.h" #include "3dsx.h" @@ -226,6 +227,11 @@ static THREEDSX_Error Load3DSXFile(FileUtil::IOFile& file, u32 base_addr, Shared code_set->entrypoint = code_set->code.addr; code_set->memory = std::make_shared>(std::move(program_image)); + Loader::ROMCodeStart = code_set->code.addr; + Loader::ROMCodeSize = code_set->code.size; + Loader::ROMReadOnlyDataStart = code_set->rodata.addr; + Loader::ROMReadOnlyDataSize = code_set->rodata.size; + LOG_DEBUG(Loader, "code size: 0x%X", loadinfo.seg_sizes[0]); LOG_DEBUG(Loader, "rodata size: 0x%X", loadinfo.seg_sizes[1]); LOG_DEBUG(Loader, "data size: 0x%X (including 0x%X of bss)", loadinfo.seg_sizes[2], hdr.bss_size); diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp index 6b88169e1..7ebbb2b7e 100644 --- a/src/core/loader/loader.cpp +++ b/src/core/loader/loader.cpp @@ -15,6 +15,9 @@ #include "core/loader/3dsx.h" #include "core/loader/elf.h" #include "core/loader/ncch.h" +#if ENABLE_BINARY_TRANSLATION +#include "core/binary_translation/BinaryTranslationLoader.h" +#endif //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -26,6 +29,11 @@ const std::initializer_list default_address_mappings = { { 0x1F000000, 0x600000, false }, // entire VRAM }; +u32 ROMCodeStart; +u32 ROMCodeSize; +u32 ROMReadOnlyDataStart; +u32 ROMReadOnlyDataSize; + FileType IdentifyFile(FileUtil::IOFile& file) { FileType type; @@ -92,6 +100,10 @@ const char* GetFileTypeString(FileType type) { } ResultStatus LoadFile(const std::string& filename) { + ROMCodeStart = 0; + ROMCodeSize = 0; + ROMReadOnlyDataStart = 0; + ROMReadOnlyDataSize = 0; FileUtil::IOFile file(filename, "rb"); if (!file.IsOpen()) { LOG_ERROR(Loader, "Failed to load file %s", filename.c_str()); @@ -112,6 +124,7 @@ ResultStatus LoadFile(const std::string& filename) { LOG_INFO(Loader, "Loading file %s as %s...", filename.c_str(), GetFileTypeString(type)); + ResultStatus status = ResultStatus::Error; switch (type) { //3DSX file format... @@ -121,14 +134,15 @@ ResultStatus LoadFile(const std::string& filename) { // Load application and RomFS if (ResultStatus::Success == app_loader.Load()) { Service::FS::RegisterArchiveType(Common::make_unique(app_loader), Service::FS::ArchiveIdCode::RomFS); - return ResultStatus::Success; + status = ResultStatus::Success; } break; } // Standard ELF file format... case FileType::ELF: - return AppLoader_ELF(std::move(file), filename_filename).Load(); + status = AppLoader_ELF(std::move(file), filename_filename).Load(); + break; // NCCH/NCSD container formats... case FileType::CXI: @@ -139,14 +153,15 @@ ResultStatus LoadFile(const std::string& filename) { // Load application and RomFS if (ResultStatus::Success == app_loader.Load()) { Service::FS::RegisterArchiveType(Common::make_unique(app_loader), Service::FS::ArchiveIdCode::RomFS); - return ResultStatus::Success; + status = ResultStatus::Success; } break; } // CIA file format... case FileType::CIA: - return ResultStatus::ErrorNotImplemented; + status = ResultStatus::ErrorNotImplemented; + break; // Error occurred durring IdentifyFile... case FileType::Error: @@ -155,10 +170,24 @@ ResultStatus LoadFile(const std::string& filename) { case FileType::Unknown: { LOG_CRITICAL(Loader, "File %s is of unknown type.", filename.c_str()); - return ResultStatus::ErrorInvalidFormat; + status = ResultStatus::ErrorInvalidFormat; } } - return ResultStatus::Error; + #if ENABLE_BINARY_TRANSLATION + if (status == ResultStatus::Success) + { + std::unique_ptr optimized_file(new FileUtil::IOFile(filename + ".obj", "rb")); + if (!optimized_file->IsOpen()) + { + LOG_WARNING(Loader, "Failed to load optimized file %s.obj", filename.c_str()); + } + else + { + BinaryTranslationLoader::Load(*optimized_file); + } + } + #endif + return status; } } // namespace Loader diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h index a7f2715ba..3ea859e3a 100644 --- a/src/core/loader/loader.h +++ b/src/core/loader/loader.h @@ -156,4 +156,12 @@ extern const std::initializer_list default_address_mappi */ ResultStatus LoadFile(const std::string& filename); +/* + * Infomation about ROM + */ +extern u32 ROMCodeStart; +extern u32 ROMCodeSize; +extern u32 ROMReadOnlyDataStart; +extern u32 ROMReadOnlyDataSize; + } // namespace diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 68b3f546e..ae4bd039f 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -165,6 +165,10 @@ ResultStatus AppLoader_NCCH::LoadExec() { s32 priority = exheader_header.arm11_system_local_caps.priority; u32 stack_size = exheader_header.codeset_info.stack_size; Kernel::g_current_process->Run(priority, stack_size); + Loader::ROMCodeStart = exheader_header.codeset_info.text.address; + Loader::ROMCodeSize = exheader_header.codeset_info.text.num_max_pages * Memory::PAGE_SIZE; + Loader::ROMReadOnlyDataStart = exheader_header.codeset_info.ro.address; + Loader::ROMReadOnlyDataSize = exheader_header.codeset_info.ro.num_max_pages * Memory::PAGE_SIZE; return ResultStatus::Success; } return ResultStatus::Error; diff --git a/src/core/mem_map.h b/src/core/mem_map.h new file mode 100644 index 000000000..500b409e0 --- /dev/null +++ b/src/core/mem_map.h @@ -0,0 +1 @@ +#include "memory.h" \ No newline at end of file