#include "arm/thumb.hpp" #include "arm/arm_meta.hpp" #include "arm/processor_default.hpp" #include "arm/processor_interpreter.hpp" #include "os.hpp" #include #include #include #include #include #include #include #include #include #include using BitField::v1::ViewBitField; void show_backtrace(); namespace Interpreter { CPUContext::CPUContext(HLE::OS::OS* os, Setup* setup) : os(os), setup(setup)/*, cfl(std::make_unique())*/ { } // CPUContext::CPUContext(const CPUContext& oth) : cfl(std::make_unique()), // cpu{}, os(oth.os), setup(oth.setup) // {} CPUContext::~CPUContext() = default; ExecutionContext::ExecutionContext(Processor& parent_) : parent(parent_) { parent.contexts.push_back(this); } ExecutionContext::~ExecutionContext() { parent.UnregisterContext(*this); } template static void WritePhysicalMemory(Memory::PhysicalMemory& mem, uint32_t address, const T value) { Memory::WriteLegacy(mem, address, value); } template static const T ReadPhysicalMemory(Memory::PhysicalMemory& mem, uint32_t address) { return Memory::ReadLegacy(mem, address); } Setup::Setup(LogManager& log_manager, const KeyDatabase& keydb_, std::unique_ptr gamecard_, Profiler::Profiler& profiler, Debugger::DebugServer& debug_server) : mem(log_manager), keydb(keydb_), gamecard(std::move(gamecard_)), profiler(profiler), debug_server(debug_server) { for (auto i : {0,1}) { std::memset(&cpus[i].cpu, 0, sizeof(cpus[i].cpu)); cpus[i].cpu.cp15.CPUId().CPUID = i; cpus[i].cpu.cpsr.mode = ARM::InternalProcessorMode::Supervisor; cpus[i].os = os.get(); cpus[i].setup = this; } } Setup::~Setup() = default; // struct ControlFlowLogger { // uint32_t indent = 0; // // void Branch(CPUContext& ctx, const char* kind, uint32_t addr) { // MakeSureFileIsOpen(ctx); // os << fmt::format("{}-> {} {:#x}", GetIndent(), kind, addr) << std::endl; // ++indent; // } // // void Return(CPUContext& ctx, const char* kind) { // MakeSureFileIsOpen(ctx); // --indent; // os << fmt::format("{}<- ({})", GetIndent(), kind) << std::endl; // } // // private: // std::string GetIndent() { // return std::string(3 * indent, ' '); // } // // void MakeSureFileIsOpen(CPUContext& ctx) { // if (os) // return; // // auto filename = fmt::format("./cfl_{}_{}.txt", ctx.os->active_thread->GetParentProcess().GetId(), ctx.os->active_thread->GetId()); // os.open(filename); // } // // std::ofstream os; // }; // #define CONTROL_FLOW_LOGGING 1 void ControlFlowLogger::Branch(CPUContext& ctx, const char* kind, uint32_t addr) { #ifdef CONTROL_FLOW_LOGGING MakeSureFileIsOpen(ctx); if (indent > 50) { os << fmt::format("====== CUT") << std::endl; indent = 0; } os << fmt::format("{}-> {} {:#x}", GetIndent(), kind, addr) << std::endl; ++indent; #endif } void ControlFlowLogger::Return(CPUContext& ctx, const char* kind) { #ifdef CONTROL_FLOW_LOGGING MakeSureFileIsOpen(ctx); if (indent) { --indent; os << fmt::format("{}<- ({}) from {:#x}", GetIndent(), kind, ctx.cpu.reg[15]) << std::endl; } else { os << fmt::format("====== ({}) from {:#x}", kind, ctx.cpu.reg[15]) << std::endl; } #endif } void ControlFlowLogger::SVC(CPUContext& ctx, uint32_t id) { #ifdef CONTROL_FLOW_LOGGING MakeSureFileIsOpen(ctx); os << fmt::format("{}svc {:#x}", GetIndent(), id) << std::endl; #endif } void ControlFlowLogger::Log(CPUContext& ctx, const std::string& str) { #ifdef CONTROL_FLOW_LOGGING MakeSureFileIsOpen(ctx); os << fmt::format("{}{}", GetIndent(), str) << std::endl; #endif } std::string ControlFlowLogger::GetIndent() { // TODO: Just store this string internally instead of creating it over and over again... return std::string(3 * indent, ' '); } void ControlFlowLogger::MakeSureFileIsOpen(CPUContext& ctx) { #ifdef CONTROL_FLOW_LOGGING if (os) return; auto filename = fmt::format("./cfl_{}_{}.txt", ctx.os->active_thread->GetParentProcess().GetId(), ctx.os->active_thread->GetId()); os.open(filename); #endif } template /*[[deprecated]]*/ static T ReadVirtualMemory(InterpreterExecutionContext& ctx, uint32_t address) { return ctx.ReadVirtualMemory(address); } template /*[[deprecated]]*/ static void WriteVirtualMemory(InterpreterExecutionContext& ctx, uint32_t address, T value) { return ctx.WriteVirtualMemory(address, value); } using InterpreterARMHandler = std::add_pointer::type; static uint32_t HandlerStubWithMessage(CPUContext& ctx, ARM::ARMInstr instr, const std::string& message) { std::string error = fmt::format("Unknown instruction {:#010x} (PC is {:#x})", instr.raw, ctx.cpu.PC()); if (!message.empty()) error += ": " + message; error += '\n'; throw std::runtime_error(error); } static uint32_t HandlerStub(CPUContext& ctx, ARM::ARMInstr instr) { return HandlerStubWithMessage(ctx, instr, ""); } static uint32_t HandlerStubAnnotated(CPUContext& ctx, ARM::ARMInstr instr, unsigned line) { return HandlerStubWithMessage(ctx, instr, "(at line " + std::to_string(line) + ")"); } static void Link(CPUContext& ctx) { ctx.cpu.LR() = ctx.cpu.PC() + 4; } static uint32_t NextInstr(CPUContext& ctx) { return ctx.cpu.PC() + 4; } static uint32_t HandlerSkip(CPUContext& ctx, ARM::ARMInstr instr, const std::string& message) { // std::cerr << "Skipping instruction 0x" << std::hex << std::setw(8) << std::setfill('0') << instr.raw; // if (!message.empty()) // std::cerr << ": " << message; // std::cerr << std::endl; return NextInstr(ctx); } // Copies the MSB of the given value to the CPSR N flag static void UpdateCPSR_N(CPUContext& ctx, uint32_t val) { ctx.cpu.cpsr.neg = (val >> 31); } // Updates the CPSR Z flag with the contents of the given value (sets the flag if the value is zero, unsets it otherwise) static void UpdateCPSR_Z(CPUContext& ctx, uint32_t val) { ctx.cpu.cpsr.zero = (val == 0); } static void UpdateCPSR_C(CPUContext& ctx, bool val) { ctx.cpu.cpsr.carry = val; } static bool GetCarry(uint32_t left, uint32_t right) { return ((left >> 31) + (right >> 31) > ((left+right) >> 31)); } static bool GetCarry(uint32_t left, uint32_t right, uint32_t cpsr_c) { return ((left >> 31) + (right >> 31) > ((left+right+cpsr_c) >> 31)); } // TODO: Unify this with GetCarry! template static bool GetCarryT(T left, T right) { static_assert(std::is_unsigned::value, "Given type must be unsigned!"); using sign_bit = std::integral_constant; return ((left >> sign_bit::value) + (right >> sign_bit::value) > (static_cast(left+right) >> sign_bit::value)); } // TODO: Unify this with GetCarry! template static bool GetCarryT(T left, T right, T cpsr_c) { static_assert(std::is_unsigned::value, "Given type must be unsigned!"); using sign_bit = std::integral_constant; return ((left >> sign_bit::value) + (right >> sign_bit::value) > (static_cast(left+right+cpsr_c) >> sign_bit::value)); } static void UpdateCPSR_C_FromCarry(CPUContext& ctx, uint32_t left, uint32_t right) { ctx.cpu.cpsr.carry = GetCarry(left, right); } static void UpdateCPSR_C_FromCarry(CPUContext& ctx, uint32_t left, uint32_t right, uint32_t cpsr_c) { ctx.cpu.cpsr.carry = GetCarry(left, right, cpsr_c); } static void UpdateCPSR_C_FromBorrow(CPUContext& ctx, uint32_t left, uint32_t right) { bool borrow = left < right; ctx.cpu.cpsr.carry = !borrow; } static void UpdateCPSR_C_FromBorrow(CPUContext& ctx, uint32_t left, uint32_t right, uint32_t cpsr_c) { bool borrow = left < (static_cast(right) + !cpsr_c); ctx.cpu.cpsr.carry = !borrow; } static bool GetOverflowFromAdd(uint32_t left, uint32_t right, uint32_t result) { return (~(left ^ right) & (left ^ result) & (right ^ result)) >> 31; } static void UpdateCPSR_V_FromAdd(CPUContext& ctx, uint32_t left, uint32_t right, uint32_t result) { // TODO: Not sure if this works fine for computations including the cpsr.carry! ctx.cpu.cpsr.overflow = GetOverflowFromAdd(left, right, result); } static void UpdateCPSR_V_FromSub(CPUContext& ctx, uint32_t left, uint32_t right, uint32_t result) { ctx.cpu.cpsr.overflow = ((left ^ right) & (left ^ result)) >> 31; } static void UpdateCPSR_V_FromSub(CPUContext& ctx, uint32_t left, uint32_t right, uint32_t result, uint32_t carry) { right = ~right; // TODO: Portability! uint64_t signed_sum = static_cast(static_cast(left)) + static_cast(static_cast(right)) + static_cast(carry); ctx.cpu.cpsr.overflow = static_cast(static_cast(result)) != signed_sum; } // Evaluates the given condition based on CPSR static bool EvalCond(CPUContext& ctx, uint32_t cond) { if (cond == 0xE/* || cond == 0xF*/) { // always (0xF apparently is never?) return true; } else if (cond == 0x0) { // Equal return (ctx.cpu.cpsr.zero == 1); } else if (cond == 0x1) { // Not Equal return (ctx.cpu.cpsr.zero == 0); } else if (cond == 0x2) { // Greater Equal (unsigned) return (ctx.cpu.cpsr.carry == 1); } else if (cond == 0x3) { // Less Than (unsigned) return (ctx.cpu.cpsr.carry == 0); } else if (cond == 0x4) { // Negative return (ctx.cpu.cpsr.neg == 1); } else if (cond == 0x5) { // Positive or Zero return (ctx.cpu.cpsr.neg == 0); } else if (cond == 0x6) { // Overflow return (ctx.cpu.cpsr.overflow == 1); } else if (cond == 0x7) { // No overflow return (ctx.cpu.cpsr.overflow == 0); } else if (cond == 0x8) { // Greater (unsigned) return (ctx.cpu.cpsr.carry == 1 && ctx.cpu.cpsr.zero == 0); } else if (cond == 0x9) { // Less Equal (unsigned) return (ctx.cpu.cpsr.carry == 0 || ctx.cpu.cpsr.zero == 1); } else if (cond == 0xa) { // Greater Equal (signed) return (ctx.cpu.cpsr.neg == ctx.cpu.cpsr.overflow); } else if (cond == 0xb) { // Less Than (signed) return (ctx.cpu.cpsr.neg != ctx.cpu.cpsr.overflow); } else if (cond == 0xc) { // Greater Than (signed) return (ctx.cpu.cpsr.zero == 0 && ctx.cpu.cpsr.neg == ctx.cpu.cpsr.overflow); } else if (cond == 0xd) { // Less Equal (signed) return (ctx.cpu.cpsr.zero == 1 || ctx.cpu.cpsr.neg != ctx.cpu.cpsr.overflow); } throw std::runtime_error("Condition not implemented"); } void CPUContext::RecordCall(uint32_t source, uint32_t target, ARM::State state) { // Callsite entry; // entry.source = source; // entry.target = target; // // TODO: Fix ARM::State to allow a plain copy here! // memcpy(&entry.state, &state, sizeof(state)); // entry.state.reg[15] = target; // backtrace.push_back(entry); } template static uint32_t HandlerBranch(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (link) Link(ctx); uint32_t target = ctx.cpu.PC() + 8 + 4 * instr.branch_target; if (link) { ctx.RecordCall(ctx.cpu.PC(), target, ctx.cpu); ctx.cfl.Branch(ctx, "bl", target); } return target; } template static uint32_t HandlerBranchExchange(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if ((instr.identifier_4_23 & ~0b10) != 0b0010'1111'1111'1111'0001) return HandlerStub(ctx, instr); if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // The link register may be used as the target specifier, hence store // the register value before linking. auto target = ctx.cpu.reg[instr.idx_rm]; if (instr.idx_rm == 14) { ctx.cfl.Return(ctx, "bx lr"); } if (link) Link(ctx); ctx.cpu.cpsr.thumb = (target & 1); if (link) { ctx.RecordCall(ctx.cpu.PC(), target & ~UINT32_C(1), ctx.cpu); ctx.cfl.Branch(ctx, "blx", target); } return target & ~UINT32_C(1); } static uint32_t HandlerCPS(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Abort early for Unpredictable configurations if (instr.cps_imod_enable == 0 && instr.cps_imod_value == 0 && instr.cps_mmod == 0) return HandlerStubAnnotated(ctx, instr, __LINE__); if (instr.cps_imod_enable == 0 && instr.cps_imod_value == 1) return HandlerStubAnnotated(ctx, instr, __LINE__); if (!ctx.cpu.cpsr.InPrivilegedMode()) return NextInstr(ctx); if (instr.cps_imod_enable) { if (instr.cps_A) ctx.cpu.cpsr.A = instr.cps_imod_value; if (instr.cps_I) ctx.cpu.cpsr.I = instr.cps_imod_value; if (instr.cps_F) ctx.cpu.cpsr.F = instr.cps_imod_value; } if (instr.cps_mmod) { ctx.cpu.cpsr.mode = MakeInternal(instr.cps_mode.Value()); // TODO: Changing to an reserved system mode is Unpredictable } return NextInstr(ctx); } static uint32_t RotateRight(uint32_t value, uint32_t bits) { // NOTE: shifting by 32 bits is undefined behavior, hence we add a special // case for these values here. if (bits == 0 || bits == 32) return value; else return (value >> bits) | (value << (32 - bits)); } struct ShifterOperand { uint32_t value; bool carry_out; }; template static uint32_t ArithmeticShiftRight(T val, uint32_t bits) = delete; /** * Preconditions: * - 0 < bits < 32 */ static uint32_t ArithmeticShiftRight(uint32_t val, uint32_t bits) { uint32_t msb = val >> 31; return (val >> bits) | (msb * (0xFFFFFFFF << (32 - bits))); } // NOTE: For immediate shifts, use CalcShifterOperandFromImmediate instead! // NOTE: ROR_RRX is always executed as a rotate in this function. // NOTE: Only the least significant 8 bits of shift_value are considered static std::optional CalcShifterOperand(uint32_t value, uint32_t shift_value, ARM::OperandShifterMode mode, bool carry) { ShifterOperand ret; // Mask out upper bits shift_value &= 0xFF; switch (mode) { case ARM::OperandShifterMode::LSL: ret.value = shift_value < 32 ? (value << shift_value) : 0; ret.carry_out = shift_value == 0 ? carry : shift_value <= 32 ? ((value << (shift_value - 1)) >> 31) : 0; return ret; case ARM::OperandShifterMode::LSR: ret.value = shift_value < 32 ? (value >> shift_value) : 0; ret.carry_out = shift_value == 0 ? carry : shift_value <= 32 ? ((value >> (shift_value - 1)) & 1) : 0; return ret; case ARM::OperandShifterMode::ASR: ret.value = shift_value == 0 ? value : shift_value <= 31 ? ArithmeticShiftRight(value, shift_value) : (0xFFFFFFFF * (value >> 31)); ret.carry_out = shift_value == 0 ? carry : shift_value < 32 ? (ArithmeticShiftRight(value, shift_value - 1) & 1) : (value >> 31); return ret; case ARM::OperandShifterMode::ROR_RRX: // This mode only considers the least significant 5 bits in shift_value ret.value = (shift_value & 0x1F) == 0 ? value : RotateRight(value, shift_value & 0x1F); ret.carry_out = shift_value == 0 ? carry : (shift_value & 0x1F) == 0 ? (value >> 31) : (RotateRight(value, (shift_value & 0x1F) - 1) & 1); return ret; default: return {}; } return ret; } static std::optional CalcShifterOperandFromImmediate(uint32_t value, uint32_t shift_value, ARM::OperandShifterMode mode, bool carry) { switch (mode) { case ARM::OperandShifterMode::LSL: return CalcShifterOperand(value, shift_value, mode, carry); case ARM::OperandShifterMode::LSR: case ARM::OperandShifterMode::ASR: return CalcShifterOperand(value, shift_value ? shift_value : 32, mode, carry); case ARM::OperandShifterMode::ROR_RRX: if (shift_value != 0) { return CalcShifterOperand(value, shift_value, mode, carry); } else { // Rotate Right with Extend by 33 bits with C as the 33rd bit ShifterOperand ret; ret.value = ((uint32_t)carry << 31) | (value >> 1); ret.carry_out = value & 1; return ret; } default: return {}; } } static std::optional GetAddr1ShifterOperand(CPUContext& ctx, ARM::ARMInstr instr) { switch (ARM::GetAddrMode1Encoding(instr)) { case ARM::AddrMode1Encoding::Imm: { // Rotate immediate by an even amount of bits auto result = RotateRight(instr.immed_8, 2 * instr.rotate_imm); bool carry_out = instr.rotate_imm ? (result >> 31) : ctx.cpu.cpsr.carry.Value(); return { {result, carry_out} }; } case ARM::AddrMode1Encoding::ShiftByImm: { auto reg = ctx.cpu.FetchReg(instr.idx_rm); return CalcShifterOperandFromImmediate(reg, instr.addr1_shift_imm, instr.addr1_shift, ctx.cpu.cpsr.carry); } case ARM::AddrMode1Encoding::ShiftByReg: { // NOTE: Chosing R15 for Rd, Rm, Rn, or Rs has Unpredictable results. auto reg = ctx.cpu.FetchReg(instr.idx_rm); return CalcShifterOperand(reg, ctx.cpu.FetchReg(instr.idx_rs), instr.addr1_shift, ctx.cpu.cpsr.carry); } default: return {}; } } static uint32_t HandlerMov(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.addr1_S && instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); ctx.cpu.reg[instr.idx_rd] = shifter_operand->value; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_C(ctx, shifter_operand->carry_out); // V unaffected } if (instr.idx_rd == ARM::Regs::PC) { return ctx.cpu.PC(); } else { return NextInstr(ctx); } } // Move Not static uint32_t HandlerMvn(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); ctx.cpu.reg[instr.idx_rd] = ~shifter_operand->value; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_C(ctx, shifter_operand->carry_out); // V unaffected } return NextInstr(ctx); } // Bit Clear static uint32_t HandlerBic(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); ctx.cpu.reg[instr.idx_rd] = ctx.cpu.reg[instr.idx_rn] & ~shifter_operand->value; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_C(ctx, shifter_operand->carry_out); // V unaffected } return NextInstr(ctx); } // Exclusive OR static uint32_t HandlerEor(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); ctx.cpu.reg[instr.idx_rd] = ctx.cpu.reg[instr.idx_rn] ^ shifter_operand->value; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_C(ctx, shifter_operand->carry_out); // V unaffected } return NextInstr(ctx); } static uint32_t HandlerMul(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); // sic: Rn defines the output here. ctx.cpu.reg[instr.idx_rn] = ctx.cpu.FetchReg(instr.idx_rm) * ctx.cpu.FetchReg(instr.idx_rs); if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rn]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rn]); // On ARMv4 and earlier, C is unpredictable, while on newer ISAs it's unaffected. // V unaffected } return NextInstr(ctx); } static uint32_t HandlerMla(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); // sic: Rn defines the output here, while Rd is an input ctx.cpu.reg[instr.idx_rn] = ctx.cpu.FetchReg(instr.idx_rm) * ctx.cpu.FetchReg(instr.idx_rs) + ctx.cpu.FetchReg(instr.idx_rd); if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rn]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rn]); // On ARMv4 and earlier, C is unpredictable, while on newer ISAs it's unaffected. // V unaffected } return NextInstr(ctx); } static uint32_t HandlerAnd(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); ctx.cpu.reg[instr.idx_rd] = ctx.cpu.FetchReg(instr.idx_rn) & shifter_operand->value; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_C(ctx, shifter_operand->carry_out); // V unaffected } return NextInstr(ctx); } // Logical OR static uint32_t HandlerOrr(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); ctx.cpu.reg[instr.idx_rd] = ctx.cpu.FetchReg(instr.idx_rn) | shifter_operand->value; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_C(ctx, shifter_operand->carry_out); // V unaffected } return NextInstr(ctx); } // Test Equivalence static uint32_t HandlerTeq(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); uint32_t alu_out = ctx.cpu.FetchReg(instr.idx_rn) ^ shifter_operand->value; UpdateCPSR_N(ctx, alu_out); UpdateCPSR_Z(ctx, alu_out); UpdateCPSR_C(ctx, shifter_operand->carry_out); // V unaffected return NextInstr(ctx); } // Test static uint32_t HandlerTst(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); uint32_t alu_out = ctx.cpu.FetchReg(instr.idx_rn) & shifter_operand->value; UpdateCPSR_N(ctx, alu_out); UpdateCPSR_Z(ctx, alu_out); UpdateCPSR_C(ctx, shifter_operand->carry_out); // V unaffected return NextInstr(ctx); } static uint32_t HandlerAdd(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.addr1_S && instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); uint32_t rn = ctx.cpu.FetchReg(instr.idx_rn); ctx.cpu.reg[instr.idx_rd] = rn + shifter_operand->value; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_C_FromCarry(ctx, rn, shifter_operand->value); UpdateCPSR_V_FromAdd(ctx, rn, shifter_operand->value, ctx.cpu.reg[instr.idx_rd]); } if (instr.idx_rd == ARM::Regs::PC) { return ctx.cpu.reg[ARM::Regs::PC]; } else { return NextInstr(ctx); } } static uint32_t HandlerAdc(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); uint32_t rn = ctx.cpu.FetchReg(instr.idx_rn); ctx.cpu.reg[instr.idx_rd] = rn + shifter_operand->value + ctx.cpu.cpsr.carry; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_C_FromCarry(ctx, rn, shifter_operand->value, ctx.cpu.cpsr.carry); UpdateCPSR_V_FromAdd(ctx, rn, shifter_operand->value, ctx.cpu.reg[instr.idx_rd]); } return NextInstr(ctx); } // Subtract with carry static uint32_t HandlerSbc(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); uint32_t rn = ctx.cpu.FetchReg(instr.idx_rn); ctx.cpu.reg[instr.idx_rd] = rn - shifter_operand->value - !ctx.cpu.cpsr.carry; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_V_FromSub(ctx, rn, shifter_operand->value, ctx.cpu.reg[instr.idx_rd], ctx.cpu.cpsr.carry); UpdateCPSR_C_FromBorrow(ctx, rn, shifter_operand->value, ctx.cpu.cpsr.carry); } return NextInstr(ctx); } // Reverse Subtract with Carry static uint32_t HandlerRsc(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); uint32_t rn = ctx.cpu.FetchReg(instr.idx_rn); ctx.cpu.reg[instr.idx_rd] = shifter_operand->value - rn - !ctx.cpu.cpsr.carry; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_V_FromSub(ctx, shifter_operand->value, rn, ctx.cpu.reg[instr.idx_rd], ctx.cpu.cpsr.carry); UpdateCPSR_C_FromBorrow(ctx, shifter_operand->value, rn, ctx.cpu.cpsr.carry); } return NextInstr(ctx); } static uint32_t HandlerSub(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); uint32_t rn = ctx.cpu.FetchReg(instr.idx_rn); ctx.cpu.reg[instr.idx_rd] = rn - shifter_operand->value; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_C_FromBorrow(ctx, rn, shifter_operand->value); UpdateCPSR_V_FromSub(ctx, rn, shifter_operand->value, ctx.cpu.reg[instr.idx_rd]); } return NextInstr(ctx); } // Reverse Subtract static uint32_t HandlerRsb(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Using PC as Rd yields unpredictable behavior in some cases"); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); uint32_t rn = ctx.cpu.FetchReg(instr.idx_rn); ctx.cpu.reg[instr.idx_rd] = shifter_operand->value - rn; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd]); UpdateCPSR_C_FromBorrow(ctx, shifter_operand->value, rn); UpdateCPSR_V_FromSub(ctx, shifter_operand->value, rn, ctx.cpu.reg[instr.idx_rd]); } return NextInstr(ctx); } template static uint32_t HandlerAddrMode3(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.idx_rd == 15) return HandlerStubWithMessage(ctx, instr, "Configuration not implemented"); if (!instr.ldr_P && instr.ldr_W) return HandlerStubWithMessage(ctx, instr, "Unpredictable configuration - maybe this isn't LDR/STR?"); // P=0: Memory access using base register; after the access, the base register has the offset applied to it (post-indexed addressing) // P=0, W=0: normal memory access using base register // P=0, W=1: unpredictable // P=1, W=0: memory access using base register with applied offset (base register remains unchanged). // P=1, W=1: memory access using base register with applied offset (base register will be updated). uint32_t address = ctx.cpu.reg[instr.idx_rn]; bool Imm = BitField::v1::ViewBitField<22, 1, uint32_t>(instr.raw); uint32_t offset = (instr.ldr_U ? 1 : -1) * (Imm ? ((instr.addr3_immed_hi << 4) | instr.addr3_immed_lo) : ctx.cpu.reg[instr.idx_rm]); if (instr.ldr_P) address += offset; // TODO: Need to take care of shared memory magic for store instructions! switch (AccessType) { case ARM::AddrMode3AccessType::LoadSignedByte: // Load with sign extend ctx.cpu.reg[instr.idx_rd] = (int8_t)ReadVirtualMemory(ctx, address); break; case ARM::AddrMode3AccessType::StoreByte: WriteVirtualMemory(ctx, address, ctx.cpu.reg[instr.idx_rd]); break; case ARM::AddrMode3AccessType::LoadSignedHalfword: // Load with sign extend // TODO: If CP15 is configured appropriately, bit0 of address may be non-zero // if ((address & 0x1) != 0) // return HandlerStubWithMessage(ctx, instr, "Unpredictable configuration"); ctx.cpu.reg[instr.idx_rd] = (int16_t)ReadVirtualMemory(ctx, address); break; case ARM::AddrMode3AccessType::LoadUnsignedHalfword: // TODO: If CP15 is configured appropriately, bit0 of address may be non-zero // if ((address & 0x1) != 0) // return HandlerStubWithMessage(ctx, instr, "Unpredictable configuration"); ctx.cpu.reg[instr.idx_rd] = ReadVirtualMemory(ctx, address); break; case ARM::AddrMode3AccessType::StoreHalfword: // TODO: If CP15 is configured appropriately, bit0 of address may be non-zero // NOTE: CP15 by default is configured appropriately to support this on the 3DS! //if ((address & 0x1) != 0) // return HandlerStubWithMessage(ctx, instr, "Unpredictable configuration"); WriteVirtualMemory(ctx, address, ctx.cpu.reg[instr.idx_rd]); break; case ARM::AddrMode3AccessType::LoadDoubleword: // TODO: If CP15 is configured appropriately, bit2 of address may be non-zero if ((instr.idx_rd % 2) && instr.idx_rd != 14 && (address & 0x7) != 0) return HandlerStubWithMessage(ctx, instr, "Unpredictable configuration"); ctx.cpu.reg[instr.idx_rd] = ReadVirtualMemory(ctx, address); ctx.cpu.reg[instr.idx_rd+1] = ReadVirtualMemory(ctx, address+4); break; case ARM::AddrMode3AccessType::StoreDoubleword: // TODO: If CP15 is configured appropriately, bit2 of address may be non-zero if ((instr.idx_rd % 2) && instr.idx_rd != 14 && (address & 0x7) != 0) return HandlerStubWithMessage(ctx, instr, "Unpredictable configuration"); WriteVirtualMemory(ctx, address, ctx.cpu.reg[instr.idx_rd]); WriteVirtualMemory(ctx, address+4, ctx.cpu.reg[instr.idx_rd+1]); break; default: return HandlerStubWithMessage(ctx, instr, "Not an addressing mode 3 instruction - configuration not implemented"); } if (!instr.ldr_P) address += offset; // Update base register if necessary if (!instr.ldr_P || instr.ldr_W) { ctx.cpu.reg[instr.idx_rn] = address; if (instr.idx_rn == 15) { // TODO: Unknown behavior for PC return HandlerStubAnnotated(ctx, instr, __LINE__); } } return NextInstr(ctx); } // Signed Multiply static uint32_t HandlerSmulxx(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // If either of the operand registers are PC, abort - this is unpredictable behavior! if (instr.idx_rn == 15 || instr.idx_rs == 15 || instr.idx_rm == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); // TODO: These computations have a chance of overflowing, which is ill-defined in C++ code! // Extract lower or bottom 16 bit depending on the given instruction and then convert to a signed integer and sign-extend to 32-bit uint32_t input1_shift = 16 * ViewBitField<5,1,uint32_t>(instr.raw); int32_t input1 = static_cast(static_cast(ctx.cpu.FetchReg(instr.idx_rm) >> input1_shift)); bool input2_shift = ViewBitField<6,1,uint32_t>(instr.raw); int32_t input2 = static_cast(static_cast(ctx.cpu.FetchReg(instr.idx_rs) >> input2_shift)); // sic: indeed, rd is used as the input operand, while rn is the output operand // TODO: This multiplication may not be well-defined C++ due to signed overflow // NOTE: The result indeed is stored in Rn ctx.cpu.reg[instr.idx_rn] = input1 * input2; return NextInstr(ctx); } // Unsigned Multiply Accumulate Accumulate Long static uint32_t HandlerUmaal(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // If either of the destination registers are PC, abort - this is unpredictable behavior! if (instr.idx_rm == 15 || instr.idx_rs == 15 || instr.idx_rd == 15 || instr.idx_rn == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); // If the destination registers are equal, abort - this is unpredictable behavior! if (instr.idx_rd == instr.idx_rn) return HandlerStubAnnotated(ctx, instr, __LINE__); // First compute the result using the original register values, then write it back uint64_t result = static_cast(ctx.cpu.FetchReg(instr.idx_rs)) * static_cast(ctx.cpu.FetchReg(instr.idx_rm)); result += ctx.cpu.reg[instr.idx_rd]; result += ctx.cpu.reg[instr.idx_rn]; ctx.cpu.reg[instr.idx_rd] = result & 0xFFFFFFFF; ctx.cpu.reg[instr.idx_rn] = result >> 32; return NextInstr(ctx); } // Signed Multiply Long static uint32_t HandlerSmull(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // If either of the destination registers are PC, abort - this is unpredictable behavior! if (instr.idx_rd == 15 || instr.idx_rn == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); // NOTE: This is actually not unpredictable, apparently. if (instr.idx_rd == instr.idx_rn) return HandlerStubAnnotated(ctx, instr, __LINE__); // Cast from uint32_t to int32_t before casting to 64-bit to have proper sign-extension. uint64_t result = static_cast(static_cast(ctx.cpu.FetchReg(instr.idx_rs))) * static_cast(static_cast(ctx.cpu.FetchReg(instr.idx_rm))); ctx.cpu.reg[instr.idx_rd] = result & 0xFFFFFFFF; ctx.cpu.reg[instr.idx_rn] = result >> 32; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rn]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd] | ctx.cpu.reg[instr.idx_rn]); // C and V are unpredictable on ARMv4 and earlier (otherwise, they are unaffected) } return NextInstr(ctx); } // Unsigned Multiply Long static uint32_t HandlerUmull(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // If either of the destination registers are PC, abort - this is unpredictable behavior! if (instr.idx_rd == 15 || instr.idx_rn == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); // NOTE: This is actually not unpredictable, apparently. if (instr.idx_rd == instr.idx_rn) return HandlerStubAnnotated(ctx, instr, __LINE__); uint64_t result = static_cast(ctx.cpu.FetchReg(instr.idx_rs)) * static_cast(ctx.cpu.FetchReg(instr.idx_rm)); ctx.cpu.reg[instr.idx_rd] = result & 0xFFFFFFFF; ctx.cpu.reg[instr.idx_rn] = result >> 32; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rn]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd] | ctx.cpu.reg[instr.idx_rn]); // C and V are unpredictable on ARMv4 and earlier (otherwise, they are unaffected) } return NextInstr(ctx); } // Signed Multiply Accumulate (16-bit) static uint32_t HandlerSmlaxx(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // If either of the operand registers are PC, abort - this is unpredictable behavior! if (instr.idx_rd == 15 || instr.idx_rn == 15 || instr.idx_rs == 15 || instr.idx_rm == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); // TODO: These computations have a chance of overflowing, which is ill-defined in C++ code! // Extract lower or bottom 16 bit depending on the given instruction and then convert to a signed integer and sign-extend to 32-bit uint32_t input1_shift = 16 * ViewBitField<5,1,uint32_t>(instr.raw); int32_t input1 = static_cast(static_cast(ctx.cpu.FetchReg(instr.idx_rm) >> input1_shift)); bool input2_shift = ViewBitField<6,1,uint32_t>(instr.raw); int32_t input2 = static_cast(static_cast(ctx.cpu.FetchReg(instr.idx_rs) >> input2_shift)); // sic: indeed, rd is used as the input operand, while rn is the output operand uint32_t result = input1 * input2 + static_cast(ctx.cpu.FetchReg(instr.idx_rd)); if (GetOverflowFromAdd(input1 * input2, static_cast(ctx.cpu.FetchReg(instr.idx_rd)), result)) ctx.cpu.cpsr.q = 1; ctx.cpu.reg[instr.idx_rn] = result; return NextInstr(ctx); } // Signed Multiply Accumulate Long static uint32_t HandlerSmlal(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // If either of the destination registers are PC, abort - this is unpredictable behavior! if (instr.idx_rd == 15 || instr.idx_rn == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); // NOTE: This is actually not unpredictable, apparently. if (instr.idx_rd == instr.idx_rn) return HandlerStubAnnotated(ctx, instr, __LINE__); // Cast from uint32_t to int32_t before casting to 64-bit to have proper sign-extension. uint64_t result = static_cast(static_cast(ctx.cpu.FetchReg(instr.idx_rs))) * static_cast(static_cast(ctx.cpu.FetchReg(instr.idx_rm))); result += ctx.cpu.reg[instr.idx_rd]; // Accumulate low part result += static_cast(ctx.cpu.reg[instr.idx_rn]) << 32; // Accumulate high part ctx.cpu.reg[instr.idx_rd] = result & 0xFFFFFFFF; ctx.cpu.reg[instr.idx_rn] = result >> 32; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rn]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd] | ctx.cpu.reg[instr.idx_rn]); // C and V are unpredictable on ARMv4 and earlier (on later versions, they are unaffected) } return NextInstr(ctx); } // Unsigned Multiply Accumulate Long static uint32_t HandlerUmlal(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // If either of the destination registers are PC, abort - this is unpredictable behavior! if (instr.idx_rd == 15 || instr.idx_rn == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); // If the destination registers are equal, abort - this is unpredictable behavior! if (instr.idx_rd == instr.idx_rn) return HandlerStubAnnotated(ctx, instr, __LINE__); // First compute the result, then add the prior uint64_t value of the destination registers, then store back the result. uint64_t result = static_cast(ctx.cpu.FetchReg(instr.idx_rs)) * static_cast(ctx.cpu.FetchReg(instr.idx_rm)); result += ctx.cpu.reg[instr.idx_rd]; result += static_cast(ctx.cpu.reg[instr.idx_rn]) << 32; ctx.cpu.reg[instr.idx_rd] = result & 0xFFFFFFFF; ctx.cpu.reg[instr.idx_rn] = result >> 32; if (instr.addr1_S) { UpdateCPSR_N(ctx, ctx.cpu.reg[instr.idx_rn]); UpdateCPSR_Z(ctx, ctx.cpu.reg[instr.idx_rd] | ctx.cpu.reg[instr.idx_rn]); // C and V are unpredictable on ARMv4 and earlier (otherwise, they are unaffected) } return NextInstr(ctx); } static uint32_t Handler00x0(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if ((instr.raw & 0b1111'1110'0000'0000'0000'1111'0000) == 0b0000'0010'0000'0000'0000'1001'0000) { return HandlerMla(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'0000'1111'0000) == 0b0000'0100'0000'0000'0000'1001'0000) { return HandlerUmaal(ctx, instr); } else if ((instr.raw & 0b1111'1110'0000'0000'0000'1111'0000) == 0b0000'1100'0000'0000'0000'1001'0000) { return HandlerSmull(ctx, instr); } else if ((instr.raw & 0b1111'1110'0000'0000'0000'1111'0000) == 0b0000'1110'0000'0000'0000'1001'0000) { return HandlerSmlal(ctx, instr); } else if ((instr.raw & 0b1111'1110'0000'0000'0000'1111'0000) == 0b0000'1000'0000'0000'0000'1001'0000) { return HandlerUmull(ctx, instr); } else if ((instr.raw & 0b1111'1110'0000'0000'0000'1111'0000) == 0b0000'1010'0000'0000'0000'1001'0000) { return HandlerUmlal(ctx, instr); } else { return HandlerStubAnnotated(ctx, instr, __LINE__); } } // Move *PSR to Register (CPSR if R=0, current SPSR otherwise) static uint32_t HandlerMRS(CPUContext& ctx, ARM::ARMInstr instr) { // Unpredictable in all circumstances if (instr.idx_rd == ARM::Regs::PC) return HandlerStubAnnotated(ctx, instr, __LINE__); // Unpredictable if (instr.R == 1 && !ctx.cpu.HasSPSR()) return HandlerStubAnnotated(ctx, instr, __LINE__); if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); ctx.cpu.reg[instr.idx_rd] = instr.R ? ctx.cpu.GetSPSR(ctx.cpu.cpsr.mode).ToNativeRaw32() : ctx.cpu.cpsr.ToNativeRaw32(); return NextInstr(ctx); } // Move to *PSR from Register static uint32_t HandlerMSR(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Unpredictable if (instr.R == 1 && !ctx.cpu.HasSPSR()) return HandlerStubAnnotated(ctx, instr, __LINE__); // TODO: Figure out if this may be used just like any other Addressing Mode 1 instruction! (e.g. applying different shift modes, etc) uint32_t operand = instr.msr_I ? RotateRight(instr.immed_8, 2 * instr.rotate_imm) : ctx.cpu.FetchReg(instr.idx_rm); // TODO: These are ARMv6-specific! const uint32_t UnallocMask = 0x06F0FC00; const uint32_t UserMask = 0xF80F0200; // writeable from any mode. N, Z, C, V, Q, G[3:0], E. const uint32_t PrivMask = 0x000001DF; // writeable from privileged modes. A, I, F, M[4:0] const uint32_t StateMask = 0x01000020; // writeable from privileged modes. ignores writes from user mode. // Unpredictable if (operand & UnallocMask) return HandlerStubAnnotated(ctx, instr, __LINE__); auto& spr = instr.R ? ctx.cpu.GetSPSR(ctx.cpu.cpsr.mode) : ctx.cpu.cpsr; auto spr_raw = spr.ToNativeRaw32(); uint32_t spr_mask = UserMask; if (instr.R == 0) { if (ctx.cpu.InPrivilegedMode()) { if (operand & StateMask) return HandlerStubAnnotated(ctx, instr, __LINE__); spr_mask |= PrivMask; } } else { spr_mask |= PrivMask | StateMask; } uint32_t mask = instr.ExpandMSRFieldMask() & spr_mask; auto spr_new = ARM::State::ProgramStatusRegister::FromNativeRaw32((spr_raw & ~mask) | (operand & mask)); if (!instr.R) { ctx.cpu.ReplaceCPSR(spr_new); } else { spr.RawCopyFrom(spr_new); } return NextInstr(ctx); } static uint32_t HandlerCmp(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); uint32_t alu_out = ctx.cpu.reg[instr.idx_rn] - shifter_operand->value; UpdateCPSR_N(ctx, alu_out); UpdateCPSR_Z(ctx, alu_out); UpdateCPSR_C_FromBorrow(ctx, ctx.cpu.reg[instr.idx_rn], shifter_operand->value); UpdateCPSR_V_FromSub(ctx, ctx.cpu.reg[instr.idx_rn], shifter_operand->value, alu_out); return NextInstr(ctx); } static uint32_t HandlerCmn(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); auto shifter_operand = GetAddr1ShifterOperand(ctx, instr); if (!shifter_operand) return HandlerStubWithMessage(ctx, instr, "Unknown shifter operand format"); uint32_t alu_out = ctx.cpu.reg[instr.idx_rn] + shifter_operand->value; UpdateCPSR_N(ctx, alu_out); UpdateCPSR_Z(ctx, alu_out); UpdateCPSR_C_FromCarry(ctx, ctx.cpu.reg[instr.idx_rn], shifter_operand->value); UpdateCPSR_V_FromAdd(ctx, ctx.cpu.reg[instr.idx_rn], shifter_operand->value, alu_out); return NextInstr(ctx); } // TODOTEST: What granularity does the 3DS use? const uint32_t monitor_address_mask = 0xffffff8; static void ClearExclusive(InterpreterExecutionContext& ctx) { ctx.monitor_address = {}; } static void MarkExclusive(InterpreterExecutionContext& ctx, uint32_t new_address) { ctx.monitor_address = (new_address & monitor_address_mask); } // Returns true if the store can be performed static bool PrepareExclusiveStore(InterpreterExecutionContext& ctx, uint32_t addr) { if (!ctx.monitor_address) { return false; } if (*ctx.monitor_address != (addr & monitor_address_mask)) { throw Mikage::Exceptions::Invalid("STREX(B/H/D) to non-exclusive address is implementation defined"); } ctx.monitor_address = {}; return true; } static uint32_t HandlerLdrex(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); auto addr = ctx.cpu.FetchReg(instr.idx_rn); MarkExclusive(ctx, addr); ctx.cpu.reg[instr.idx_rd] = ReadVirtualMemory(ctx, addr); return NextInstr(ctx); } static uint32_t HandlerStrex(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); auto addr = ctx.cpu.FetchReg(instr.idx_rn); if (PrepareExclusiveStore(ctx, addr)) { WriteVirtualMemory(ctx, addr, ctx.cpu.FetchReg(instr.idx_rm)); ctx.cpu.reg[instr.idx_rd] = 0; } else { // Not in exclusive state => Not updating memory ctx.cpu.reg[instr.idx_rd] = 1; } return NextInstr(ctx); } static uint32_t HandlerLdrexb(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); auto addr = ctx.cpu.FetchReg(instr.idx_rn); MarkExclusive(ctx, addr); ctx.cpu.reg[instr.idx_rd] = ReadVirtualMemory(ctx, addr); return NextInstr(ctx); } static uint32_t HandlerStrexb(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); auto addr = ctx.cpu.FetchReg(instr.idx_rn); if (PrepareExclusiveStore(ctx, addr)) { WriteVirtualMemory(ctx, addr, ctx.cpu.FetchReg(instr.idx_rm)); ctx.cpu.reg[instr.idx_rd] = 0; } else { // Not in exclusive state => Not updating memory ctx.cpu.reg[instr.idx_rd] = 1; } return NextInstr(ctx); } static uint32_t HandlerLdrexh(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); auto addr = ctx.cpu.FetchReg(instr.idx_rn); MarkExclusive(ctx, addr); ctx.cpu.reg[instr.idx_rd] = ReadVirtualMemory(ctx, addr); return NextInstr(ctx); } static uint32_t HandlerStrexh(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); auto addr = ctx.cpu.FetchReg(instr.idx_rn); if (PrepareExclusiveStore(ctx, addr)) { WriteVirtualMemory(ctx, addr, ctx.cpu.FetchReg(instr.idx_rm)); ctx.cpu.reg[instr.idx_rd] = 0; } else { // Not in exclusive state => Not updating memory ctx.cpu.reg[instr.idx_rd] = 1; } return NextInstr(ctx); } static uint32_t HandlerLdrexd(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if ((instr.idx_rd % 2) != 0 || instr.idx_rd == ARM::Regs::LR || instr.idx_rn == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Unpredictable configuration"); auto addr = ctx.cpu.FetchReg(instr.idx_rn); MarkExclusive(ctx, addr); ctx.cpu.reg[instr.idx_rd ] = ReadVirtualMemory(ctx, addr ); ctx.cpu.reg[instr.idx_rd + 1] = ReadVirtualMemory(ctx, addr + 4); return NextInstr(ctx); } static uint32_t HandlerStrexd(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if ((instr.idx_rm % 2) != 0 || instr.idx_rm == ARM::Regs::LR || instr.idx_rn == ARM::Regs::PC || instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Unpredictable configuration"); if (instr.idx_rd == instr.idx_rn || instr.idx_rd == instr.idx_rm || instr.idx_rd == instr.idx_rm + 1) return HandlerStubWithMessage(ctx, instr, "Unpredictable configuration"); auto addr = ctx.cpu.FetchReg(instr.idx_rn); if (PrepareExclusiveStore(ctx, addr)) { WriteVirtualMemory(ctx, addr , ctx.cpu.FetchReg(instr.idx_rm )); WriteVirtualMemory(ctx, addr + 4, ctx.cpu.FetchReg(instr.idx_rm + 1)); ctx.cpu.reg[instr.idx_rd] = 0; } else { // Not in exclusive state => Not updating memory ctx.cpu.reg[instr.idx_rd] = 1; } return NextInstr(ctx); } // Count Leading Zeros static uint32_t HandlerClz(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Unpredictable if (instr.idx_rm == ARM::Regs::PC || instr.idx_rd == ARM::Regs::PC) return HandlerStubAnnotated(ctx, instr, __LINE__); auto rm = ctx.cpu.FetchReg(instr.idx_rm); ctx.cpu.reg[instr.idx_rd] = 32; while (rm != 0) { rm >>= 1; --ctx.cpu.reg[instr.idx_rd]; } return NextInstr(ctx); } static uint32_t Handler0001(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { // TODO: Find a cool way to handle these masks in a unified way and compile-time asserting that they are non-ambiguous if ((instr.raw & 0b1111'1111'1111'0000'1111'1111'0000) == 0b0001'0110'1111'0000'1111'0001'0000) { return HandlerClz(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'0000) == 0b0001'1000'0000'0000'1111'1001'0000) { return HandlerStrex(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'1111) == 0b0001'1001'0000'0000'1111'1001'1111) { return HandlerLdrex(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'0000) == 0b0001'1010'0000'0000'1111'1001'0000) { return HandlerStrexd(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'1111) == 0b0001'1011'0000'0000'1111'1001'1111) { return HandlerLdrexd(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'0000) == 0b0001'1100'0000'0000'1111'1001'0000) { return HandlerStrexb(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'1111) == 0b0001'1101'0000'0000'1111'1001'1111) { return HandlerLdrexb(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'0000) == 0b0001'1110'0000'0000'1111'1001'0000) { return HandlerStrexh(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'1111) == 0b0001'1111'0000'0000'1111'1001'1111) { return HandlerLdrexh(ctx, instr); } else if ((instr.raw & 0b1111'0100'0000'0000'1111'1001'0000) == 0b0001'0000'0000'0000'0000'1001'0000) { throw std::runtime_error("Should not be hit anymore with new dispatcher"); } else if ((instr.raw & 0b1111'0100'0000'0000'0000'1001'0000) == 0b0001'0100'0000'0000'0000'1001'0000) { throw std::runtime_error("Should not be hit anymore with new dispatcher"); } else if ((instr.identifier_4_23 & ~0b10) == 0b0010'1111'1111'1111'0001) { if (instr.identifier_4_23 & 0b10) { return HandlerBranchExchange(ctx, instr); } else { return HandlerBranchExchange(ctx, instr); } } else if ((instr.raw & 0b1111'1111'0001'1111'1110'0010'0000) == 0b0001'0000'0000'0000'0000'0000'0000) { return HandlerCPS(ctx, instr); } else if ((instr.raw & 0b1111'1011'1111'0000'1111'1111'1111) == 0b0001'0000'1111'0000'0000'0000'0000) { return HandlerMRS(ctx, instr); } else if ((instr.raw & 0b1111'1011'0000'1111'1111'1111'0000) == 0b0001'0010'0000'1111'0000'0000'0000) { return HandlerMSR(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'1111'0000'0000'0000) == 0b0001'0101'0000'0000'0000'0000'0000) { return HandlerCmp(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'1111'0000'0000'0000) == 0b0001'0111'0000'0000'0000'0000'0000) { return HandlerCmn(ctx, instr); } else if ((instr.raw & 0b1111'1110'1111'0000'0000'0000'0000) == 0b0001'1010'0000'0000'0000'0000'0000) { return HandlerMov(ctx, instr); } else if ((instr.raw & 0b1111'1110'1111'0000'0000'0000'0000) == 0b0001'1110'0000'0000'0000'0000'0000) { return HandlerMvn(ctx, instr); } else if ((instr.raw & 0b1111'1110'0000'0000'0000'0000'0000) == 0b0001'1000'0000'0000'0000'0000'0000) { return HandlerOrr(ctx, instr); } else if ((instr.raw & 0b1111'1110'0000'0000'0000'0000'0000) == 0b0001'1100'0000'0000'0000'0000'0000) { return HandlerBic(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'1111'0000'0000'0000) == 0b0001'0001'0000'0000'0000'0000'0000) { return HandlerTst(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'1111'0000'0000'0000) == 0b0001'0011'0000'0000'0000'0000'0000) { return HandlerTeq(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'1111'0000'0000'0000) == 0b0001'0010'0000'0000'0000'0000'0000) { // Technically UNPREDICTABLE due to missing S flag, but Luigi's Mansion 2 uses this, and my guess is it's not a NOP, so let's just do a plain TEQ... return HandlerTeq(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'0000'1001'0000) == 0b0001'0000'0000'0000'0000'1000'0000) { return HandlerSmlaxx(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'1111'0000'1001'0000) == 0b0001'0110'0000'0000'0000'1000'0000) { return HandlerSmulxx(ctx, instr); } else { return HandlerStub(ctx, instr); } } static uint32_t Handler0011(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if ((instr.raw & 0b1111'1110'1111'0000'0000'0000'0000) == 0b0011'1010'0000'0000'0000'0000'0000) { return HandlerMov(ctx, instr); } else if ((instr.raw & 0b1111'1110'1111'0000'0000'0000'0000) == 0b0011'1110'0000'0000'0000'0000'0000) { return HandlerMvn(ctx, instr); } else if ((instr.raw & 0b1111'1110'0000'0000'0000'0000'0000) == 0b0011'1000'0000'0000'0000'0000'0000) { return HandlerOrr(ctx, instr); } else if ((instr.raw & 0b1111'1110'0000'0000'0000'0000'0000) == 0b0011'1100'0000'0000'0000'0000'0000) { return HandlerBic(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'1111'0000'0000'0000) == 0b0011'0001'0000'0000'0000'0000'0000) { return HandlerTst(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'1111'0000'0000'0000) == 0b0011'0011'0000'0000'0000'0000'0000) { return HandlerTeq(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'1111'0000'0000'0000) == 0b0011'0010'0000'0000'0000'0000'0000) { // Technically UNPREDICTABLE due to missing S flag, but Luigi's Mansion 2 uses this, and my guess is it's not a NOP, so let's just do a plain TEQ... return HandlerTeq(ctx, instr); } else if ((instr.raw & 0b1111'1011'0000'1111'0000'0000'0000) == 0b0011'0010'0000'1111'0000'0000'0000) { return HandlerMSR(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'1111'0000'0000'0000) == 0b0011'0101'0000'0000'0000'0000'0000) { return HandlerCmp(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'1111'0000'0000'0000) == 0b0011'0111'0000'0000'0000'0000'0000) { return HandlerCmn(ctx, instr); } else { return HandlerStub(ctx, instr); } } template static uint32_t HandlerMemoryAccess(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // P=0: Memory access using base register; after the access, the base register has the offset applied to it (post-indexed addressing) // P=0, W=0: normal memory access (LDR, LDRB, STR, STRB) using base register // P=0, W=1: unpriviliged memory access (LDRBT, LDRT, STRBT, STRT) // P=1, W=0: memory access using base register with applied offset (base register remains unchanged). // P=1, W=1: memory access using base register with applied offset (base register will be updated). // Not actually a memory access instruction in this case! if (instr.ldr_I && (instr.raw & 0x10) != 0) return HandlerStubAnnotated(ctx, instr, __LINE__); if (instr.idx_rn == instr.idx_rd && (!instr.ldr_P || instr.ldr_W)) { // Unknown instruction behavior for Rd == Rn: Which of the register writes has higher priority? return HandlerStubAnnotated(ctx, instr, __LINE__); } uint32_t base = ctx.cpu.FetchReg(instr.idx_rn); // lazy address offset - TODO: Catch exceptions! (or better not use exceptions here at all!) auto addr_offset = [&]{return CalcShifterOperandFromImmediate(ctx.cpu.FetchReg(instr.idx_rm), instr.ldr_shift_imm, instr.ldr_shift, ctx.cpu.cpsr.carry).value().value; }; uint32_t offset = (instr.ldr_U ? 1 : -1) * ((instr.ldr_I) ? addr_offset() : instr.ldr_offset.Value()); if (instr.ldr_P) base += offset; if (store) { // Store memory if (byte_access) { WriteVirtualMemory(ctx, base, ctx.cpu.FetchReg(instr.idx_rd)); } else { WriteVirtualMemory(ctx, base, ctx.cpu.FetchReg(instr.idx_rd)); } // TODO: Magic for shared memory?? } else { // Load memory uint32_t value = byte_access ? ReadVirtualMemory(ctx, base) : ReadVirtualMemory(ctx, base); // When loading to PC, clear bit0 to 0 and copy its old value to the thumb field if (instr.idx_rd != 15) { ctx.cpu.reg[instr.idx_rd] = value; } else { // Switch to Thumb mode if the LSB of the loaded value is set, but // if it isn't then make sure we are not branching to a non-word // aligned address (since that is UNPREDICTABLE in ARM mode). if ((value & 3) == 0b10) return HandlerStubAnnotated(ctx, instr, __LINE__); ctx.cpu.cpsr.thumb = value & 1; ctx.cpu.reg[instr.idx_rd] = value & 0xFFFFFFFE; } } if (!instr.ldr_P || instr.ldr_W) { if (!instr.ldr_P) base += offset; ctx.cpu.reg[instr.idx_rn] = base; if (instr.idx_rn == 15) { // TODO: Unknown behavior for PC return HandlerStubAnnotated(ctx, instr, __LINE__); } } if (!store && instr.idx_rd == 15) { return ctx.cpu.reg[15]; } else { return NextInstr(ctx); } } template static uint32_t HandlerLDM_STM(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Unpredictable if (instr.addr4_registers == 0) return HandlerStubAnnotated(ctx, instr, __LINE__); // TODO: if L and S and User/System mode: Unpredictable if (Load && instr.addr4_S && !ctx.cpu.HasSPSR()) return HandlerStubAnnotated(ctx, instr, __LINE__); // TODO: Should always start at the smallest address uint32_t addr = ctx.cpu.FetchReg(instr.idx_rn); uint32_t registers_accessed = [&]{ uint32_t ret = 0; for (unsigned i = 0; i < 16; ++i) ret += ((instr.addr4_registers >> i) & 1); return ret; }(); // NOTE: Registers are always accessed starting from the lowest address, regardless of whether we are increasing or decreasing. uint32_t addr2 = addr - ((instr.addr4_U ? 0 : 1) * 4 * (registers_accessed - 1)) + ((instr.addr4_U ? 4 : -4) * instr.addr4_P); uint32_t next_pc = NextInstr(ctx); for (unsigned i = 0; i < 16; ++i) { if (((instr.addr4_registers >> i) & 1) == 0) continue; if (instr.addr4_P) addr += 4 * (instr.addr4_U ? 1 : -1); if (i == ARM::Regs::PC) { if (Load) { auto val = ReadVirtualMemory(ctx, addr2); next_pc = val & ~1; ctx.cpu.cpsr.thumb = val & 1; // Move SPSR to CPSR if (instr.addr4_S) { ctx.cpu.ReplaceCPSR(ctx.cpu.GetSPSR(ctx.cpu.cpsr.mode)); } ctx.cfl.Return(ctx, "pop"); } else { // NOTE: The stored value is ImplementationDefined! WriteVirtualMemory(ctx, addr2, ctx.cpu.FetchReg(i)); } } else { if (Load) { ctx.cpu.reg[i] = ReadVirtualMemory(ctx, addr2); } else { // if !Load and in privileged mode, use user mode banked registers instead if (i >= 8 && !Load && ctx.cpu.InPrivilegedMode()) { WriteVirtualMemory(ctx, addr2, ctx.cpu.banked_regs_user[i-8]); } else { WriteVirtualMemory(ctx, addr2, ctx.cpu.FetchReg(i)); } } } // TODO: Compute the final addr value statically rather than updating it each time in this loop! if (!instr.addr4_P) addr += 4 * (instr.addr4_U ? 1 : -1); addr2 += 4; } if (instr.addr4_W) ctx.cpu.reg[instr.idx_rn] = addr; return next_pc; } // unsigned 8 bit additions static uint32_t HandlerUadd8(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Unpredictable configuration if (instr.idx_rn == ARM::Regs::PC || instr.idx_rm == ARM::Regs::PC || instr.idx_rd == ARM::Regs::PC) return HandlerStubAnnotated(ctx, instr, __LINE__); auto val_rn = ctx.cpu.FetchReg(instr.idx_rn); auto val_rm = ctx.cpu.FetchReg(instr.idx_rm); uint32_t result = 0; result |= static_cast((val_rn & 0xFF) + (val_rm & 0xFF)); result |= static_cast((val_rn & 0xFF00) + (val_rm & 0xFF00)) << 8; result |= static_cast((val_rn & 0xFF0000) + (val_rm & 0xFF0000)) << 16; result |= static_cast((val_rn & 0xFF000000) + (val_rm & 0xFF000000)) << 24; ctx.cpu.reg[instr.idx_rd] = result; ctx.cpu.cpsr.ge0 = GetCarryT(val_rn, val_rm); ctx.cpu.cpsr.ge1 = GetCarryT(val_rn >> 8, val_rm >> 8); ctx.cpu.cpsr.ge2 = GetCarryT(val_rn >> 16, val_rm >> 16); ctx.cpu.cpsr.ge3 = GetCarryT(val_rn >> 24, val_rm >> 24); return NextInstr(ctx); } // saturated 8 bit additions static uint32_t HandlerUqadd8(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Unpredictable configuration if (instr.idx_rn == ARM::Regs::PC || instr.idx_rm == ARM::Regs::PC || instr.idx_rd == ARM::Regs::PC) return HandlerStubAnnotated(ctx, instr, __LINE__); auto val_rn = ctx.cpu.FetchReg(instr.idx_rn); auto val_rm = ctx.cpu.FetchReg(instr.idx_rm); uint32_t result = 0; result |= std::min(uint32_t { 255 }, (val_rn + val_rm) & 0xFF); result |= std::min(uint32_t { 255 }, ((val_rn >> 8u) + (val_rm >> 8u)) & 0xFF) << 8; result |= std::min(uint32_t { 255 }, ((val_rn >> 16u) + (val_rm >> 16u)) & 0xFF) << 16; result |= std::min(uint32_t { 255 }, ((val_rn >> 24u) + (val_rm >> 24u)) & 0xFF) << 24; ctx.cpu.reg[instr.idx_rd] = result; return NextInstr(ctx); } // unsigned 8 bit additions (halfed) static uint32_t HandlerUhadd8(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Unpredictable configuration if (instr.idx_rn == ARM::Regs::PC || instr.idx_rm == ARM::Regs::PC || instr.idx_rd == ARM::Regs::PC) return HandlerStubAnnotated(ctx, instr, __LINE__); auto val_rn = ctx.cpu.FetchReg(instr.idx_rn); auto val_rm = ctx.cpu.FetchReg(instr.idx_rm); uint32_t result = 0; result |= static_cast(((val_rn & 0xFF) + (val_rm & 0xFF)) / 2); result |= static_cast(((val_rn & 0xFF00) + (val_rm & 0xFF00)) / 2) << 8; result |= static_cast(((val_rn & 0xFF0000) + (val_rm & 0xFF0000)) / 2) << 16; result |= static_cast(((val_rn & 0xFF000000) + (val_rm & 0xFF000000)) / 2) << 24; ctx.cpu.reg[instr.idx_rd] = result; return NextInstr(ctx); } // 8 bit subtractions template static uint32_t HandlerXsub8(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Unpredictable configuration if (instr.idx_rn == ARM::Regs::PC || instr.idx_rm == ARM::Regs::PC || instr.idx_rd == ARM::Regs::PC) return HandlerStubAnnotated(ctx, instr, __LINE__); auto val_rn = ctx.cpu.FetchReg(instr.idx_rn); auto val_rm = ctx.cpu.FetchReg(instr.idx_rm); uint32_t result = 0; result |= static_cast((val_rn & 0xFF) - (val_rm & 0xFF)); result |= static_cast((val_rn & 0xFF00) - (val_rm & 0xFF00)) << 8; result |= static_cast((val_rn & 0xFF0000) - (val_rm & 0xFF0000)) << 16; result |= static_cast((val_rn & 0xFF000000) - (val_rm & 0xFF000000)) << 24; ctx.cpu.reg[instr.idx_rd] = result; using ComparisonType = std::conditional_t; ctx.cpu.cpsr.ge0 = static_cast(val_rn >> 0) >= static_cast(val_rm >> 0); ctx.cpu.cpsr.ge0 = static_cast(val_rn >> 8) >= static_cast(val_rm >> 8); ctx.cpu.cpsr.ge0 = static_cast(val_rn >> 16) >= static_cast(val_rm >> 16); ctx.cpu.cpsr.ge0 = static_cast(val_rn >> 24) >= static_cast(val_rm >> 24); return NextInstr(ctx); } // saturated signed 8 bit subtractions (-128...127) static uint32_t HandlerQsub8(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Unpredictable configuration if (instr.idx_rn == ARM::Regs::PC || instr.idx_rm == ARM::Regs::PC || instr.idx_rd == ARM::Regs::PC) return HandlerStubAnnotated(ctx, instr, __LINE__); auto val_rn = ctx.cpu.FetchReg(instr.idx_rn); auto val_rm = ctx.cpu.FetchReg(instr.idx_rm); uint32_t result = 0; if ((val_rn & 0xFF) + 0x80 >= (val_rm & 0xFF)) result |= ((val_rn & 0xFF) - (val_rm & 0xFF)) & 0xFF; if (((val_rn >> 8) & 0xFF) + 0x80 >= ((val_rm >> 8) & 0xFF)) result |= ((val_rn & 0xFF00) - (val_rm & 0xFF00)) & 0xFF00; if (((val_rn >> 16) & 0xFF) + 0x80 >= ((val_rm >> 16) & 0xFF)) result |= ((val_rn & 0xFF0000) - (val_rm& 0xFF0000)) & 0xFF0000; if ((val_rn >> 24) + 0x80 >= (val_rm >> 24)) result |= ((val_rn & 0xFF000000) - (val_rm& 0xFF000000)) & 0xFF000000; ctx.cpu.reg[instr.idx_rd] = result; return NextInstr(ctx); } static uint32_t HandlerUqsub8(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Unpredictable configuration if (instr.idx_rn == ARM::Regs::PC || instr.idx_rm == ARM::Regs::PC || instr.idx_rd == ARM::Regs::PC) return HandlerStubAnnotated(ctx, instr, __LINE__); auto val_rn = ctx.cpu.FetchReg(instr.idx_rn); auto val_rm = ctx.cpu.FetchReg(instr.idx_rm); uint32_t result = 0; if ((val_rn & 0xFF) >= (val_rm & 0xFF)) result |= ((val_rn & 0xFF) - (val_rm & 0xFF)); if ((val_rn & 0xFF00) >= (val_rm & 0xFF00)) result |= ((val_rn & 0xFF00) - (val_rm & 0xFF00)); if ((val_rn & 0xFF0000) >= (val_rm & 0xFF0000)) result |= ((val_rn & 0xFF0000) - (val_rm & 0xFF0000)); if ((val_rn & 0xFF000000) >= (val_rm & 0xFF000000)) result |= ((val_rn & 0xFF000000) - (val_rm & 0xFF000000)); ctx.cpu.reg[instr.idx_rd] = result; return NextInstr(ctx); } static uint32_t HandlerSel(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Unpredictable configuration if (instr.idx_rn == ARM::Regs::PC || instr.idx_rm == ARM::Regs::PC || instr.idx_rd == ARM::Regs::PC) return HandlerStubAnnotated(ctx, instr, __LINE__); uint32_t result = 0; result |= 0xFF & (ctx.cpu.cpsr.ge0 ? ctx.cpu.reg[instr.idx_rn] : ctx.cpu.reg[instr.idx_rm]); result |= 0xFF00 & (ctx.cpu.cpsr.ge1 ? ctx.cpu.reg[instr.idx_rn] : ctx.cpu.reg[instr.idx_rm]); result |= 0xFF0000 & (ctx.cpu.cpsr.ge2 ? ctx.cpu.reg[instr.idx_rn] : ctx.cpu.reg[instr.idx_rm]); result |= 0xFF000000 & (ctx.cpu.cpsr.ge3 ? ctx.cpu.reg[instr.idx_rn] : ctx.cpu.reg[instr.idx_rm]); ctx.cpu.reg[instr.idx_rd] = result; return NextInstr(ctx); } // Unsigned Saturate static uint32_t HandlerUsat(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Using PC is Unpredictable if (instr.idx_rd == 15 || instr.idx_rm == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); auto operand = CalcShifterOperand(ctx.cpu.FetchReg(instr.idx_rm), instr.addr1_shift_imm, instr.addr1_shift, 0)->value; uint32_t max_value = (UINT32_C(1) << instr.sat_imm) - 1; ctx.cpu.reg[instr.idx_rd] = boost::algorithm::clamp(operand, 0, max_value); if (ctx.cpu.reg[instr.idx_rd] != operand) ctx.cpu.cpsr.q = 1; return NextInstr(ctx); } // Signed Saturate static uint32_t HandlerSsat(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Using PC is Unpredictable if (instr.idx_rd == 15 || instr.idx_rm == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); auto operand = CalcShifterOperand(ctx.cpu.FetchReg(instr.idx_rm), instr.addr1_shift_imm, instr.addr1_shift, 0)->value; int32_t min_value = -(int32_t { 1 } << instr.sat_imm); int32_t max_value = (int32_t { 1 } << instr.sat_imm) - 1; ctx.cpu.reg[instr.idx_rd] = boost::algorithm::clamp(operand, min_value, max_value); if (ctx.cpu.reg[instr.idx_rd] != operand) ctx.cpu.cpsr.q = 1; return NextInstr(ctx); } // Extract two bytes and repack them as two zero-extended half-words static uint32_t HandlerUxtb16(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Using PC is Unpredictable if (instr.idx_rd == 15 || instr.idx_rm == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); ctx.cpu.reg[instr.idx_rd] = RotateRight(ctx.cpu.FetchReg(instr.idx_rm), 8 * instr.uxtb_rotate) & 0xFF00FF; return NextInstr(ctx); } // Extract a byte value from a register static uint32_t HandlerUxtb(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Using PC is Unpredictable if (instr.idx_rd == 15 || instr.idx_rm == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); ctx.cpu.reg[instr.idx_rd] = RotateRight(ctx.cpu.FetchReg(instr.idx_rm), 8 * instr.uxtb_rotate) & 0xFF; return NextInstr(ctx); } static uint32_t HandlerUxtab(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Using PC is Unpredictable if (instr.idx_rd == ARM::Regs::PC || instr.idx_rm == ARM::Regs::PC) return HandlerStubAnnotated(ctx, instr, __LINE__); if (instr.idx_rn == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "UXTB incorrectly recognized as UXTAB!"); uint32_t operand = RotateRight(ctx.cpu.FetchReg(instr.idx_rm), 8 * instr.uxtb_rotate) & 0xFF; ctx.cpu.reg[instr.idx_rd] = ctx.cpu.reg[instr.idx_rn] + operand; return NextInstr(ctx); } // Signed eXTract Byte static uint32_t HandlerSxtb(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Using PC is Unpredictable if (instr.idx_rd == 15 || instr.idx_rm == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); // Compute result of rotation and then cast to int8_t to get proper sign extension ctx.cpu.reg[instr.idx_rd] = static_cast(RotateRight(ctx.cpu.FetchReg(instr.idx_rm), 8 * instr.uxtb_rotate) & 0xFF); return NextInstr(ctx); } static uint32_t HandlerUxtah(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Using PC is Unpredictable if (instr.idx_rd == 15 || instr.idx_rm == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); uint32_t offset = ctx.cpu.reg[instr.idx_rn]; if (instr.idx_rn == 15) { // This case is actually an UXTH instruction, so drop the offset offset = 0; } ctx.cpu.reg[instr.idx_rd] = offset + (RotateRight(ctx.cpu.FetchReg(instr.idx_rm), 8 * instr.uxtb_rotate) & 0xFFFF); return NextInstr(ctx); } static uint32_t HandlerSxtab(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Using PC is Unpredictable if (instr.idx_rd == 15 || instr.idx_rm == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); // NOTE: This case is actually an SXTB instruction if (instr.idx_rn == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); // Rotate and sign extend from 8 to 32 bits, and add the result to Rn ctx.cpu.reg[instr.idx_rd] = ctx.cpu.reg[instr.idx_rn] + (int8_t)(uint8_t)(RotateRight(ctx.cpu.FetchReg(instr.idx_rm), 8 * instr.uxtb_rotate)); return NextInstr(ctx); } static uint32_t HandlerSxtah(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Using PC is Unpredictable if (instr.idx_rd == 15 || instr.idx_rm == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); uint32_t offset = ctx.cpu.reg[instr.idx_rn]; if (instr.idx_rn == 15) { // This case is actually an SXTH instruction, so drop the offset offset = 0; } // Rotate and sign extend from 16 to 32 bits, and add the result to Rn ctx.cpu.reg[instr.idx_rd] = offset + (int16_t)(uint16_t)(RotateRight(ctx.cpu.FetchReg(instr.idx_rm), 8 * instr.uxtb_rotate)); return NextInstr(ctx); } // PacK Halfword Bottom Top static uint32_t HandlerPkhbt(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Using PC is Unpredictable if (instr.idx_rd == 15 || instr.idx_rm == 15 || instr.idx_rn == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); ctx.cpu.reg[instr.idx_rd] = ctx.cpu.reg[instr.idx_rn] & 0xFFFF; ctx.cpu.reg[instr.idx_rd] |= (ctx.cpu.reg[instr.idx_rm] << instr.addr1_shift_imm) & 0xFFFF0000; return NextInstr(ctx); } // PacK Halfword Top Bottom static uint32_t HandlerPkhtb(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Using PC is Unpredictable if (instr.idx_rd == 15 || instr.idx_rm == 15 || instr.idx_rn == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); ctx.cpu.reg[instr.idx_rd] = 0; // shift_imm=0 encodes a 32-bit shift, hence only the sign of rm is relevant if (instr.addr1_shift_imm == 0) { ctx.cpu.reg[instr.idx_rd] |= (reinterpret_cast(ctx.cpu.reg[instr.idx_rm]) >> 31) & 0xFFFF; } else { ctx.cpu.reg[instr.idx_rd] |= ArithmeticShiftRight(ctx.cpu.reg[instr.idx_rm], instr.addr1_shift_imm); } ctx.cpu.reg[instr.idx_rd] |= ctx.cpu.reg[instr.idx_rn] & 0xFFFF0000; return NextInstr(ctx); } // Byte-Reverse Word static uint32_t HandlerRev(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Using PC is Unpredictable if (instr.idx_rd == 15 || instr.idx_rm == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); const uint32_t bytes[] = { ctx.cpu.reg[instr.idx_rm] & 0xFF, (ctx.cpu.reg[instr.idx_rm] >> 8) & 0xFF, (ctx.cpu.reg[instr.idx_rm] >> 16) & 0xFF, (ctx.cpu.reg[instr.idx_rm] >> 24) & 0xFF }; ctx.cpu.reg[instr.idx_rd] = bytes[3]; ctx.cpu.reg[instr.idx_rd] |= bytes[2] << 8; ctx.cpu.reg[instr.idx_rd] |= bytes[1] << 16; ctx.cpu.reg[instr.idx_rd] |= bytes[0] << 24; return NextInstr(ctx); } // Byte-Reverse Packed Halfword static uint32_t HandlerRev16(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Using PC is Unpredictable if (instr.idx_rd == 15 || instr.idx_rm == 15) return HandlerStubAnnotated(ctx, instr, __LINE__); const uint32_t bytes[] = { ctx.cpu.reg[instr.idx_rm] & 0xFF, (ctx.cpu.reg[instr.idx_rm] >> 8) & 0xFF, (ctx.cpu.reg[instr.idx_rm] >> 16) & 0xFF, (ctx.cpu.reg[instr.idx_rm] >> 24) & 0xFF }; ctx.cpu.reg[instr.idx_rd] = (bytes[1] | (bytes[0] << 8)); ctx.cpu.reg[instr.idx_rd] |= (bytes[3] | (bytes[2] << 8)) << 16; return NextInstr(ctx); } static uint32_t Handler01xx(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (instr.cond == 0xF) { // TODO: Isn't this handled before the table lookup now? if ((instr.raw & 0b1101'0111'0000'1111'0000'0000'0000) == 0b0101'0101'0000'1111'0000'0000'0000) { // PLD - Preload Data // This is just a hint about memory access, hence we don't need to emulate it. return NextInstr(ctx); } // Otherwise, this is an unknown instruction return HandlerStubAnnotated(ctx, instr, __LINE__); } if ((instr.raw & 0b1111'1110'0000'0000'0000'0011'0000) == 0b0110'1110'0000'0000'0000'0001'0000) { return HandlerUsat(ctx, instr); } else if ((instr.raw & 0b1111'1111'1111'0000'0011'1111'0000) == 0b0110'1100'1111'0000'0000'0111'0000) { throw std::runtime_error("Should not be hit anymore with new dispatcher"); } else if ((instr.raw & 0b1111'1111'1111'0000'0011'1111'0000) == 0b0110'1110'1111'0000'0000'0111'0000) { return HandlerUxtb(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'0011'1111'0000) == 0b0110'1110'0000'0000'0000'0111'0000) { return HandlerUxtab(ctx, instr); } else if ((instr.raw & 0b1111'1111'1111'0000'0011'1111'0000) == 0b0110'1111'1111'0000'0000'0111'0000) { throw std::runtime_error("Should not be hit anymore with new dispatcher"); } else if ((instr.raw & 0b1111'1111'1111'0000'0011'1111'0000) == 0b0110'1010'1111'0000'0000'0111'0000) { return HandlerSxtb(ctx, instr); } else if ((instr.raw & 0b1111'1111'1111'0000'0011'1111'0000) == 0b0110'1011'1111'0000'0000'0111'0000) { throw std::runtime_error("Should not be hit anymore with new dispatcher"); } else if ((instr.raw & 0b1111'1111'0000'0000'0011'1111'0000) == 0b0110'1010'0000'0000'0000'0111'0000) { return HandlerSxtab(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'0011'1111'0000) == 0b0110'1011'0000'0000'0000'0111'0000) { throw std::runtime_error("Should not be hit anymore with new dispatcher"); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'0000) == 0b0110'0101'0000'0000'1111'1001'0000) { throw std::runtime_error("Should not be hit anymore with new dispatcher"); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'0000) == 0b0110'0110'0000'0000'1111'1001'0000) { throw std::runtime_error("Should not be hit anymore with new dispatcher"); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'0000) == 0b0110'0111'0000'0000'1111'1001'0000) { throw std::runtime_error("Should not be hit anymore with new dispatcher"); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'0000) == 0b0110'0001'0000'0000'1111'1111'0000) { throw std::runtime_error("Should not be hit anymore with new dispatcher"); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'0000) == 0b0110'0101'0000'0000'1111'1111'0000) { throw std::runtime_error("Should not be hit anymore with new dispatcher"); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'0000) == 0b0110'0110'0000'0000'1111'1111'0000) { throw std::runtime_error("Should not be hit anymore with new dispatcher"); } else if ((instr.raw & 0b1111'1111'0000'0000'1111'1111'0000) == 0b0110'1000'0000'0000'1111'1011'0000) { return HandlerSel(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'0000'0111'0000) == 0b0110'1000'0000'0000'0000'0001'0000) { return HandlerPkhbt(ctx, instr); } else if ((instr.raw & 0b1111'1111'0000'0000'0000'0111'0000) == 0b0110'1000'0000'0000'0000'0101'0000) { return HandlerPkhtb(ctx, instr); } else if ((instr.raw & 0b1111'1111'1111'0000'1111'1111'0000) == 0b0110'1011'1111'0000'1111'0011'0000) { return HandlerRev(ctx, instr); } else if ((instr.raw & 0b1111'1111'1111'0000'1111'1111'0000) == 0b0110'1011'1111'0000'1111'1011'0000) { return HandlerRev16(ctx, instr); } else { return HandlerStubAnnotated(ctx, instr, __LINE__); } } static void LoadOrStoreFloat(InterpreterExecutionContext& ctx, uint32_t address, float& reg_value, bool load) { if (load) { auto value = ReadVirtualMemory(ctx, address); memcpy(®_value, &value, sizeof(value)); } else { uint32_t value = 0; memcpy(&value, ®_value, sizeof(value)); WriteVirtualMemory(ctx, address, value); } } static uint32_t LoadStoreFloatSingle(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) { return NextInstr(ctx); } const bool is_double = ViewBitField<8, 1, uint32_t>(instr.raw); // FLDD/FLDS/FSTD/FSTS: Single register, no writeback unsigned idx_fd = (instr.idx_rd << 1) | instr.addr5_D; uint32_t address = ctx.cpu.FetchReg(instr.idx_rn); if (instr.ldr_U) address += 4 * instr.addr5_offset; else address -= 4 * instr.addr5_offset; // TODO: May need fixing when big-endian mode support is added for (unsigned reg = 0; reg < (is_double ? 2 : 1); ++reg) LoadOrStoreFloat(ctx, address + 4 * reg, ctx.cpu.fpreg[idx_fd + reg], instr.addr4_L); return NextInstr(ctx); } static uint32_t LoadStoreFloatMultiple(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) { return NextInstr(ctx); } const bool is_double = ViewBitField<8, 1, uint32_t>(instr.raw); // TODO: Should always start at the smallest address uint32_t start_addr = ctx.cpu.FetchReg(instr.idx_rn); if (instr.ldr_P && !instr.ldr_U && instr.ldr_W) start_addr -= 4 * instr.addr5_offset; uint32_t word_count = instr.addr5_offset - (is_double && (instr.addr5_offset & 1)); uint32_t updated_rn = ctx.cpu.FetchReg(instr.idx_rn); if(!instr.ldr_P && instr.ldr_U && instr.ldr_W) updated_rn += 4 * instr.addr5_offset; if(instr.ldr_P && !instr.ldr_U && instr.ldr_W) updated_rn -= 4 * instr.addr5_offset; // FLDMD: offset&1 must be 0 (?); offset must be != 0; d + offset/2 must be <=32 // FLDMD: offset&1 must be 0 (?); offset must be != 0; d + offset must be <=32 unsigned idx_fd = (instr.idx_rd << 1) | instr.addr5_D; for (unsigned reg = 0; reg < word_count; ++reg) LoadOrStoreFloat(ctx, start_addr + 4 * reg, ctx.cpu.fpreg[idx_fd + reg], instr.addr4_L); // TODO: Assert that rn is not contained in the registers list if (instr.addr4_W) { ctx.cpu.reg[instr.idx_rn] = updated_rn; // Behavior unknown for PC if (instr.idx_rn == ARM::Regs::PC) return HandlerStubAnnotated(ctx, instr, __LINE__); } return NextInstr(ctx); } static uint32_t Handler1101(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { // Handled via interpreter_dispatch_table instead return HandlerStubAnnotated(ctx, instr, __LINE__); } static uint32_t HandlerFMDRR(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Unknown behavior when PC is used if (instr.idx_rn == ARM::Regs::PC || instr.idx_rd == ARM::Regs::PC) return HandlerStubAnnotated(ctx, instr, __LINE__); // TODO: Single precision is not implemented, assert for that! memcpy(&ctx.cpu.dpreg_raw[instr.idx_rm].raw_high, &ctx.cpu.reg[instr.idx_rn], sizeof(uint32_t)); memcpy(&ctx.cpu.dpreg_raw[instr.idx_rm].raw_low, &ctx.cpu.reg[instr.idx_rd], sizeof(uint32_t)); return NextInstr(ctx); } static uint32_t HandlerFMRDD(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Move to two Registers from Double Precision // TODO: Single precision is not implemented, assert for that! if (instr.idx_rn == ARM::Regs::PC || instr.idx_rd == ARM::Regs::PC) return HandlerStubAnnotated(ctx, instr, __LINE__); memcpy(&ctx.cpu.reg[instr.idx_rn], &ctx.cpu.dpreg_raw[instr.idx_rm].raw_high, sizeof(uint32_t)); memcpy(&ctx.cpu.reg[instr.idx_rd], &ctx.cpu.dpreg_raw[instr.idx_rm].raw_low, sizeof(uint32_t)); return NextInstr(ctx); } static uint32_t Handler1100(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { // Handled via interpreter_dispatch_table instead return HandlerStubAnnotated(ctx, instr, __LINE__); } // Move to Register from Coprocessor static uint32_t HandlerMRC(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (instr.coproc_id != 15) return HandlerStubWithMessage(ctx, instr, "Only cp15 supported currently"); // TODO: At least furthermore recognize 5 (TLB) and 7 (Debug) if (instr.coproc_opcode1 != 0) return HandlerStubWithMessage(ctx, instr, "Only cp15 opcode1=0 supported currently"); uint32_t data = 0; switch ((instr.idx_rn << 8) | (instr.idx_rm << 4) | instr.coproc_opcode2) { // CPU ID register case 0x005: data = ctx.cpu.cp15.CPUId().raw; break; // Control Register case 0x100: data = ctx.cpu.cp15.Control().raw; break; // Auxiliary Control Register case 0x101: data = ctx.cpu.cp15.AuxiliaryControl().raw; break; case 0xd03: data = ctx.cpu.cp15.ThreadLocalStorage().virtual_addr; break; default: { std::stringstream ss; ss << "Unknown CRn/CRm/opcode2 combination: " << std::hex << instr.idx_rn << ", " << instr.idx_rm << ", " << instr.coproc_opcode2; return HandlerStubWithMessage(ctx, instr, ss.str()); } } if (instr.idx_rd == ARM::Regs::PC) { // TODO: // N = data[31] // Z = data[30] // C = data[29] // V = data[28] return HandlerStubWithMessage(ctx, instr, "Rd==PC not supported"); } else { ctx.cpu.reg[instr.idx_rd] = data; } return NextInstr(ctx); } // Move to Coprocessor from Register static uint32_t HandlerMCR(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // Implemented according to Citra's CP15 code for now... TODO: Test all of this! if (instr.coproc_id != 15) return HandlerStubWithMessage(ctx, instr, "Only cp15 supported currently"); // TODO: At least furthermore recognize 5 (TLB) and 7 (Debug) if (instr.coproc_opcode1 != 0) return HandlerStubWithMessage(ctx, instr, "Only cp15 opcode1=0 supported currently"); if (instr.idx_rn == 0) return HandlerStubWithMessage(ctx, instr, "Rn=R0 is expected to be read-only"); if (instr.idx_rd == 15) return HandlerStubWithMessage(ctx, instr, "Rd=PC not supported currently"); switch ((instr.idx_rn << 8) | (instr.idx_rm << 4) | instr.coproc_opcode2) { // Control Register case 0x100: ctx.cpu.cp15.Control().raw = ctx.cpu.reg[instr.idx_rd]; break; // Auxiliary Control Register case 0x101: ctx.cpu.cp15.AuxiliaryControl().raw = ctx.cpu.reg[instr.idx_rd]; break; // Invalidate Entire Instruction Cache Register case 0x750: return HandlerSkip(ctx, instr, "No instruction cache emulation"); // Flush Prefetch Buffer Register case 0x754: return HandlerSkip(ctx, instr, "No prefetch buffer emulation"); // Invalidate Entire Data Cache Register case 0x760: return HandlerSkip(ctx, instr, "No data cache emulation"); // Data Synchronization Barrier Register case 0x7a4: return HandlerSkip(ctx, instr, "No data synchronization emulation"); // Data Memory Barrier Register case 0x7a5: return HandlerSkip(ctx, instr, "No data memory barrier emulation"); default: { std::stringstream ss; ss << "Unknown CRn/CRm/opcode2 combination: " << std::hex << instr.idx_rn << ", " << instr.idx_rm << ", " << instr.coproc_opcode2; return HandlerStubWithMessage(ctx, instr, ss.str()); } } return NextInstr(ctx); } // large integral numbers may not be representable accurately by 32-bit // floating point numbers. This function provides a safe way to clamp a // floating point number to the given integer range. template static IntType ClampToIntegerRange(FloatType value, IntType min, IntType max) { static_assert(std::is_floating_point::value, ""); static_assert(std::is_integral::value, ""); // Get largest floating point number within the given range FloatType min_float = std::nextafter(min, IntType(0)); // TODO: This returns the wrong value! for 0xffffff80, converting to float will yield 0x100000000... FloatType max_float = std::nextafter(max, IntType(0)); // TODO: Instead, should use nexttowardf(float{0x10000000}, 0.f) // auto max_float = std::nexttoward(FloatType { 0x100000000 }, static_cast(0)); // // static_assert(std::is_same_v); // Make sure we got the right overload for nexttoward if (value < min_float) return min; if (value > max_float) return max; return static_cast(value); } template static auto& GetVFPRegisters(CPUContext& ctx); template<> auto& GetVFPRegisters(CPUContext& ctx) { return ctx.cpu.fpreg; } template<> auto& GetVFPRegisters(CPUContext& ctx) { return ctx.cpu.dpreg; } template static uint32_t HandlerVFPDataProcessing(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); if (IsDouble) { // UNDEFINED: TODO: N != 1 && M != 1 only applies to some instructions. if (instr.addr5_D/* || instr.vfp_data_N || instr.vfp_data_M*/) { // return HandlerStubAnnotated(ctx, instr, __LINE__); } } auto idx_single_d = (instr.idx_rd << 1) | instr.addr5_D; auto idx_single_n = (instr.idx_rn << 1) | instr.vfp_data_N; auto idx_single_m = (instr.idx_rm << 1) | instr.vfp_data_M; auto idx_double_d = instr.idx_rd; auto idx_double_n = instr.idx_rn; auto idx_double_m = instr.idx_rm; auto idx_fd = IsDouble ? idx_double_d.Value() : idx_single_d; auto idx_fn = IsDouble ? idx_double_n.Value() : idx_single_n; auto idx_fm = IsDouble ? idx_double_m.Value() : idx_single_m; auto& regs = GetVFPRegisters(ctx); using RegType = std::remove_reference_t; static_assert(std::is_same::value || std::is_same::value, ""); if (!instr.vfp_data_opcode_p && !instr.vfp_data_opcode_q) { // FMAC/FNMAC/FMSC/FNMSC - multiply-accumulate-like if (!instr.vfp_data_opcode_r && !instr.vfp_data_opcode_s) { regs[idx_fd] = std::fma(regs[idx_fn], regs[idx_fm], regs[idx_fd]); } else if (!instr.vfp_data_opcode_r && instr.vfp_data_opcode_s) { regs[idx_fd] = std::fma(regs[idx_fn], -regs[idx_fm], regs[idx_fd]); } else if (instr.vfp_data_opcode_r && !instr.vfp_data_opcode_s) { regs[idx_fd] = std::fma(regs[idx_fn], regs[idx_fm], -regs[idx_fd]); } else { regs[idx_fd] = std::fma(regs[idx_fn], -regs[idx_fm], -regs[idx_fd]); } } else if (!instr.vfp_data_opcode_p && instr.vfp_data_opcode_q && instr.vfp_data_opcode_r) { // FSUB/FADD if (instr.vfp_data_opcode_s) regs[idx_fd] = regs[idx_fn] - regs[idx_fm]; else regs[idx_fd] = regs[idx_fn] + regs[idx_fm]; } else if (!instr.vfp_data_opcode_p && instr.vfp_data_opcode_q && !instr.vfp_data_opcode_r) { // FNMUL/FMUL if (instr.vfp_data_opcode_s) regs[idx_fd] = -regs[idx_fn] * regs[idx_fm]; else regs[idx_fd] = regs[idx_fn] * regs[idx_fm]; } else if (instr.vfp_data_opcode_p && !instr.vfp_data_opcode_q && !instr.vfp_data_opcode_r && !instr.vfp_data_opcode_s) { // FDIV regs[idx_fd] = regs[idx_fn] / regs[idx_fm]; } else if (instr.vfp_data_opcode_p && instr.vfp_data_opcode_q && instr.vfp_data_opcode_r && instr.vfp_data_opcode_s) { // Use extension opcode (given by idx_fn) switch (idx_single_n) { case 0b00000: // FCPY // TODO: Consider FPSCR.LEN regs[idx_fd] = regs[idx_fm]; break; case 0b00001: // FABS // TODO: Consider FPSCR.LEN regs[idx_fd] = (regs[idx_fm] > 0) ? regs[idx_fm] : (-regs[idx_fm]); break; case 0b00011: // FSQRT // TODO: Consider FPSCR.LEN // TODO: Consider rounding mode from FPSCR regs[idx_fd] = std::sqrt(regs[idx_fm]); break; case 0b01000: // FCMP case 0b01001: // FCMP { // FCMP(E) - Compare (with Exceptions on quiet NaNs) // TODO: Verify remainders of the instruction? // TODO: Raise exceptions if Sd or Sm are NaN // (mind the differences between FCMP and FCMPS though) ctx.cpu.fpscr.less = regs[idx_fd] < regs[idx_fm]; ctx.cpu.fpscr.equal = regs[idx_fd] == regs[idx_fm]; ctx.cpu.fpscr.greater_equal_unordered = !(regs[idx_fd] < regs[idx_fm]); // TODO: No idea whether this works as intended: // We here assume that if we are "greater_equal_unordered" but neither greater nor equal, then we are unordered ctx.cpu.fpscr.unordered = ctx.cpu.fpscr.greater_equal_unordered && !(regs[idx_fd] >= regs[idx_fm]); break; } case 0b01010: // FCMPZ case 0b01011: // FCMPEZ { // FCMP(E)Z - Compare (with Exceptions on quiet NaNs) with Zero // TODO: Verify remainders of the instruction? // TODO: Raise exceptions if Sd or Sm are NaN // (mind the differences between FCMPZ and FCMPSZ though) ctx.cpu.fpscr.less = regs[idx_fd] < 0.f; ctx.cpu.fpscr.equal = regs[idx_fd] == 0.f; ctx.cpu.fpscr.greater_equal_unordered = !(regs[idx_fd] < 0.f); // TODO: No idea whether this works as intended: // We here assume that if we are "greater_equal_unordered" but neither greater nor equal, then we are unordered ctx.cpu.fpscr.unordered = ctx.cpu.fpscr.greater_equal_unordered && !(regs[idx_fd] >= 0.f); break; } case 0b10000: { // FUITO - Unsigned Integer TO Single/Double // First, get the integer stored in the single-precision register uint32_t integer; memcpy(&integer, &ctx.cpu.fpreg[idx_single_m], sizeof(integer)); // Cast the integer to a single-/double-precision float regs[idx_fd] = static_cast(integer); break; } case 0b10001: { // FSITO - Signed Integer TO Single/Double // First, get the integer stored in the single-precision register int32_t integer; memcpy(&integer, &ctx.cpu.fpreg[idx_single_m], sizeof(integer)); // Cast the integer to a single-/double-precision float regs[idx_fd] = static_cast(integer); break; } case 0b11000: case 0b11001: { // FTOUI - Float TO Unsigned Integer // TODO: The lowest bit in the extended opcode defines that we should use RZ mode rather than the rounding mode given by FPSCR // TODO: If NaN, an invalid operation exception is raised and the result is zero if the exception is untrapped. auto value = ClampToIntegerRange(regs[idx_fm], 0, 0xFFFFFFFF); memcpy(&ctx.cpu.fpreg[idx_single_d], &value, sizeof(value)); break; } case 0b11010: case 0b11011: { // FTOSI - Float TO Signed Integer // TODO: The lowest bit in the extended opcode defines that we should use RZ mode rather than the rounding mode given by FPSCR // TODO: If NaN, an invalid operation exception is raised and the result is zero if the exception is untrapped. auto value = ClampToIntegerRange(regs[idx_fm], static_cast(0x80000000), 0x7FFFFFFF); memcpy(&ctx.cpu.fpreg[idx_single_d], &value, sizeof(value)); break; } case 0b00010: { // FNEG - Negate // TODO: Consider FPSCR.LEN regs[idx_fd] = -regs[idx_fm]; break; } case 0b01111: { // FCVT if (IsDouble) { // double -> single ctx.cpu.fpreg[idx_single_d] = ctx.cpu.dpreg[idx_double_m]; } else { // single -> double ctx.cpu.dpreg[idx_double_d] = ctx.cpu.fpreg[idx_single_m]; } break; } default: return HandlerStubAnnotated(ctx, instr, __LINE__); } } else { return HandlerStubAnnotated(ctx, instr, __LINE__); } return NextInstr(ctx); } static uint32_t HandlerVFPRegisterTransfer(CPUContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); bool is_double = ViewBitField<8, 1, uint32_t>(instr.raw); auto idx_fn = (instr.idx_rn << 1) | instr.vfp_data_N; switch ((instr.raw >> 20) & 0xF) { case 0b0000: { if (is_double) return HandlerStubWithMessage(ctx, instr, "Double code path not implemented"); // FMSR / FMDLR (Floating-point Move to Double-precision Low from Register) auto value = ctx.cpu.FetchReg(instr.idx_rd); memcpy(&ctx.cpu.fpreg[idx_fn], &value, sizeof(value)); break; } case 0b0001: { // FMRS - Move to Register if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Unpredictable configuration"); if (is_double) return HandlerStubWithMessage(ctx, instr, "Double code path not implemented"); uint32_t value; memcpy(&value, &ctx.cpu.fpreg[idx_fn], sizeof(value)); ctx.cpu.reg[instr.idx_rd] = value; break; } case 0b0010: { // FMDHR - Floating-point Move to Double-precision High from Register if (instr.idx_rd == ARM::Regs::PC) return HandlerStubWithMessage(ctx, instr, "Unpredictable configuration"); if (!is_double) return HandlerStubWithMessage(ctx, instr, "Unknown instruction"); // Load register into high part of the double register memcpy(&ctx.cpu.fpreg[idx_fn+1], &ctx.cpu.reg[instr.idx_rd], sizeof(uint32_t)); break; } case 0b1110: { // FMXR - Move to System Register // System register is determined by idx_fn if (is_double) return HandlerStubWithMessage(ctx, instr, "Double code path not implemented"); auto value = ctx.cpu.FetchReg(instr.idx_rd); if (idx_fn == 0b00010) { ctx.cpu.fpscr.raw = value; } else { // Unhandled system register return HandlerStubAnnotated(ctx, instr, __LINE__); } break; } case 0b1111: { // TODO: Check FMSTAT (Rd=15?) // FMRX - Move from System Register // (If Rd=15, this is referred to as FMSTAT) // System register is determined by idx_fn if (is_double) return HandlerStubWithMessage(ctx, instr, "Double code path not implemented"); uint32_t value; if (idx_fn == 0b00010) { value = ctx.cpu.fpscr.raw; } else { // Unhandled system register return HandlerStubAnnotated(ctx, instr, __LINE__); } // FMRX from FPSCR to the PC is actually an FMSTAT instruction if (idx_fn == 0b00010 && instr.idx_rd == ARM::Regs::PC) { // Copy condition flags from FPSCR to CPSR (discard other 28 bits) ctx.cpu.cpsr.neg = ctx.cpu.fpscr.less.Value(); ctx.cpu.cpsr.zero = ctx.cpu.fpscr.equal.Value(); ctx.cpu.cpsr.carry = ctx.cpu.fpscr.greater_equal_unordered.Value(); ctx.cpu.cpsr.overflow = ctx.cpu.fpscr.unordered.Value(); } else { // Otherwise, just copy the value ctx.cpu.reg[instr.idx_rd] = value; } break; } default: return HandlerStubAnnotated(ctx, instr, __LINE__); } return NextInstr(ctx); } static uint32_t Handler1110(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if ((instr.raw & 0b1111'0000'0000'0000'1111'0001'0000) == 0b1110'0000'0000'0000'1010'0000'0000) { return HandlerVFPDataProcessing(ctx, instr); } else if ((instr.raw & 0b1111'0000'0000'0000'1111'0001'0000) == 0b1110'0000'0000'0000'1011'0000'0000) { return HandlerVFPDataProcessing(ctx, instr); } else if ((instr.raw & 0b1111'0000'0000'0000'1110'0111'1111) == 0b1110'0000'0000'0000'1010'0001'0000) { return HandlerVFPRegisterTransfer(ctx, instr); } else if ((instr.raw & 0x100010) == 0x100010) { return HandlerMRC(ctx, instr); } else if ((instr.raw & 0x100010) == 0x10) { return HandlerMCR(ctx, instr); } else { return HandlerStubWithMessage(ctx, instr, "Unknown 0b1110 instruction"); } } static uint32_t Handler100P(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { // Handled via interpreter_dispatch_table instead return HandlerStubAnnotated(ctx, instr, __LINE__); } // SWI/SVC - software interrupt / supervisor call static uint32_t HandlerSWI(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { if (!EvalCond(ctx, instr.cond)) return NextInstr(ctx); // TODO: Actually, we should be moving to supervisor mode before deferring to the OS // Reset monitor address to avoid thread tearing issues where an // LDREX-STREX pair is interrupted by a rescheduling system call ClearExclusive(ctx); ctx.cfl.SVC(ctx, instr.raw & 0xFFFFFF); auto* thread = ctx.os->active_thread; try { thread->YieldForSVC(instr.raw & 0xFFFFFF); } catch (HLE::OS::Thread* stopped_thread) { ctx.setup->os->SwitchToSchedulerFromThread(*thread); throw std::runtime_error("Attempted to resume stopped thread"); } return NextInstr(ctx); } static uint32_t Handler1111(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { // Handled via interpreter_dispatch_table instead return HandlerStubAnnotated(ctx, instr, __LINE__); } static InterpreterARMHandler handlers_arm_prim[] = { Handler00x0, Handler0001, Handler00x0, Handler0011, Handler01xx, Handler01xx, Handler01xx, Handler01xx, Handler100P, Handler100P, HandlerBranch, HandlerBranch, Handler1100, Handler1101, Handler1110, Handler1111 }; static_assert(sizeof(handlers_arm_prim) / sizeof(handlers_arm_prim[0]) == 16, "Must have exactly 16 primary ARM instruction handlers"); static uint32_t LegacyHandler(InterpreterExecutionContext& ctx, ARM::ARMInstr arminstr) { return handlers_arm_prim[arminstr.opcode_prim](ctx, arminstr); } using InterpreterARMHandlerForJIT = std::add_pointer::type; // TODO: Remove once all handlers have been changed to take an InterpreterExecutionContext argument template inline constexpr InterpreterARMHandlerForJIT Wrap = +[](ExecutionContext& ctx, ARM::ARMInstr instr) -> uint32_t { return F(static_cast(ctx), instr); }; InterpreterARMHandlerForJIT LookupHandler(ARM::Instr instr) { switch (instr) { case ARM::Instr::AND: return Wrap; case ARM::Instr::EOR: return Wrap; case ARM::Instr::SUB: return Wrap; case ARM::Instr::RSB: return Wrap; case ARM::Instr::ADD: return Wrap; case ARM::Instr::ADC: return Wrap; case ARM::Instr::SBC: return Wrap; case ARM::Instr::RSC: return Wrap; case ARM::Instr::TST: return Wrap; case ARM::Instr::TEQ: return Wrap; case ARM::Instr::CMP: return Wrap; case ARM::Instr::CMN: return Wrap; case ARM::Instr::ORR: return Wrap; case ARM::Instr::MOV: return Wrap; case ARM::Instr::BIC: return Wrap; case ARM::Instr::MVN: return Wrap; case ARM::Instr::MUL: return Wrap; case ARM::Instr::SSUB8: return Wrap>; case ARM::Instr::QSUB8: return Wrap; case ARM::Instr::UADD8: return Wrap; case ARM::Instr::USUB8: return Wrap>; case ARM::Instr::UQADD8: return Wrap; case ARM::Instr::UQSUB8: return Wrap; case ARM::Instr::UHADD8: return Wrap; case ARM::Instr::SSAT: return Wrap; case ARM::Instr::USAT: return Wrap; case ARM::Instr::SXTAH: return Wrap; case ARM::Instr::UXTB16: return Wrap; case ARM::Instr::UXTAH: return Wrap; case ARM::Instr::B: return Wrap>; case ARM::Instr::BL: return Wrap>; case ARM::Instr::BX: return Wrap>; // case ARM::Instr::BLX: return Wrap>; case ARM::Instr::LDR: return Wrap>; case ARM::Instr::LDRB: return Wrap>; case ARM::Instr::LDRH: return Wrap>; case ARM::Instr::LDRSH: return Wrap>; case ARM::Instr::LDRSB: return Wrap>; case ARM::Instr::LDRD: return Wrap>; case ARM::Instr::STR: return Wrap>; case ARM::Instr::STRB: return Wrap>; case ARM::Instr::STRH: return Wrap>; case ARM::Instr::STRD: return Wrap>; case ARM::Instr::LDM: return Wrap>; case ARM::Instr::STM: return Wrap>; case ARM::Instr::MSR: return Wrap; // case ARM::Instr::MRS: return Wrap; case ARM::Instr::VSTR: return Wrap; case ARM::Instr::VLDM: return Wrap; case ARM::Instr::VSTM: return Wrap; case ARM::Instr::VFP_S: return +[](ExecutionContext& ctx_, ARM::ARMInstr arminstr) -> uint32_t { auto& ctx = static_cast(ctx_); if (ViewBitField<4, 1, uint32_t>(arminstr.raw)) { return HandlerVFPRegisterTransfer(ctx, arminstr); } else { return HandlerVFPDataProcessing(ctx, arminstr); } }; case ARM::Instr::VFP_D: return +[](ExecutionContext& ctx_, ARM::ARMInstr arminstr) -> uint32_t { auto& ctx = static_cast(ctx_); if (ViewBitField<4, 1, uint32_t>(arminstr.raw)) { return HandlerVFPRegisterTransfer(ctx, arminstr); } else { return HandlerVFPDataProcessing(ctx, arminstr); } }; case ARM::Instr::MCRR_VFP: return Wrap; case ARM::Instr::MRRC_VFP: return Wrap; case ARM::Instr::SWI: return Wrap; default: return Wrap; } } static const auto default_dispatch_table = GenerateDispatchTable(LookupHandler, Wrap); static uint32_t HandlerStubThumb(CPUContext& ctx, ARM::ThumbInstr instr, const std::string& message) { std::stringstream err; err << "Unknown instruction 0x" << std::hex << std::setw(4) << std::setfill('0') << instr.raw; if (!message.empty()) err << ": " << message; throw std::runtime_error("Unknown Thumb instruction: " + err.str()); } // NOTE: Only use the return value of this function for instructions that never access the PC! template static uint32_t ForwardThumbToARM(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { // Forward the call to the ARM instruction: (void)(*interpreter_dispatch_table)[ARM::BuildDispatchTableKey(instr.raw)](ctx, instr); // Return current instruction + 2 for the next instruction return ctx.cpu.reg[15] + 2; } // Variant of ForwardThumbToARM. This function may be called for ARM instructions that read the PC: In this case, we make sure that the read returns the instruction address plus 4 instead of plus 8. // NOTE: Only use the return value of this function for instructions that never modify the PC! template static uint32_t ForwardThumbToARMMayReadPC(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { // Subtract 4 from the PC auto old_pc = ctx.cpu.reg[15]; ctx.cpu.reg[15] -= 4; // Forward the call to the ARM instruction: (void)(*interpreter_dispatch_table)[ARM::BuildDispatchTableKey(instr.raw)](ctx, instr); // Return current instruction + 2 for the next instruction return old_pc + 2; } template static uint32_t DispatchThumb(InterpreterExecutionContext& ctx, ARM::ThumbInstr instr) { if (auto decoded = DecodeThumb(instr); decoded.arm_equivalent) { uint32_t next_instr = decoded.may_read_pc ? ForwardThumbToARMMayReadPC(ctx, *decoded.arm_equivalent) : ForwardThumbToARM(ctx, *decoded.arm_equivalent); if (decoded.may_modify_pc) { return ctx.cpu.reg[15]; } else { return next_instr; } } else if (instr.opcode_upper5 == 0b10100) { // ADD (5) // This is slightly different from the ARM ADD since it ignores the lowest two PC bits before adding // TODO: This should be possible to achieve by modifying PC in place like we do for LDR (3) below auto result = ((ctx.cpu.reg[15] + 4) & 0xfffffffc); result += instr.immed_low_8 * 4; ctx.cpu.reg[instr.idx_rd_high] = result; return ctx.cpu.reg[15] + 2; } else if (instr.opcode_upper5 == 0b01001) { // LDR (3) // NOTE: There is a subtle differences between the Thumb encoding of this instruction and the equivalent ARM encoding! // In particular, this instruction ignores bit1 in the program counter, while on ARM having bit1 set causes unpredictable behavior. ARM::ARMInstr arm_instr; arm_instr.raw = (0b1110'0101'1001'1111ul << 16) | (instr.idx_rd_high << 12) | (instr.immed_low_8 << 2); // The Thumb encoding of this instruction ignores bit1 in the PC. Hence, let's emulate this here auto actual_pc = ctx.cpu.reg[15]; ctx.cpu.reg[15] &= ~0x2; // Furthermore, reads must return instr_offset+4 rather than instr_offset+8 (returned by FetchReg). Hence, subtract 4 here. ctx.cpu.reg[15] -= 4; (void)HandlerMemoryAccess(ctx, arm_instr); return actual_pc + 2; } else if (instr.opcode_upper4 == 0b1101) { // B (1) - conditional branch if (!EvalCond(ctx, instr.cond)) return ctx.cpu.reg[15] + 2; // Sign-extend offset uint32_t offset = instr.signed_immed_low_8; return ctx.cpu.reg[15] + 4 + (offset << 1); } else if (instr.opcode_upper3 == 0b111) { switch (ViewBitField<11, 2, uint16_t>(instr.raw)) { case 0b00: { // B (2) - unconditional Branch // Sign-extend offset uint32_t offset = instr.signed_immed_11.Value(); return ctx.cpu.reg[15] + 4 + (offset << 1); } case 0b10: // First instruction constituting a BL or BLX (1) sequence // NOTE: This implements a far jump by splitting the instruction into two. // The first instruction stores the first half of the target offset in the LR register, ctx.cpu.LR() = (ctx.cpu.PC() + 4) + (static_cast(instr.signed_immed_11) << 12); return ctx.cpu.PC() + 2; case 0b01: case 0b11: { // Second instruction constituting a BL or BLX (1) sequence // Combines the embedded offset with the LR value (initialized in the first instruction) and stores the result in PC. bool thumb = ViewBitField<12, 1, uint16_t>(instr.raw); uint32_t target = ctx.cpu.LR() + (instr.unsigned_immed_11 << 1); if (!thumb) target &= 0xFFFFFFFC; ctx.cpu.LR() = (ctx.cpu.PC() + 2) | 1; // Address of next instruction ctx.cpu.cpsr.thumb = thumb; ctx.RecordCall(ctx.cpu.PC(), target, ctx.cpu); ctx.cfl.Branch(ctx, "bl(x)", target); return target; } default: return 0; // TODO: UNREACHABLE } } else if (instr.opcode_upper9 == 0b0100'0111'1) { // BLX (2) - Branch with Link and Exchange auto idx_rm = instr.idx_rm | (instr.idx_rm_upperbit << 3); if (idx_rm == ARM::Regs::PC) return HandlerStubThumb(ctx, instr, "Unpredictable configuration"); // Link - make sure to save the LR in case it's used as the target specifier auto target = ctx.cpu.reg[idx_rm]; ctx.cpu.LR() = (ctx.cpu.reg[15] + 2) | 1; // Exchange and Branch ctx.cpu.cpsr.thumb = target & 1; ctx.RecordCall(ctx.cpu.PC(), target & 0xFFFFFFFE, ctx.cpu); ctx.cfl.Branch(ctx, "blx_t", target); return target & 0xFFFFFFFE; } else if (instr.opcode_upper9 == 0b0100'0111'0) { // BX - Branch with Exchange auto idx_rm = instr.idx_rm | (instr.idx_rm_upperbit << 3); if (idx_rm == ARM::Regs::PC) return HandlerStubThumb(ctx, instr, "Unimplemented configuration"); ctx.cfl.Return(ctx, "bx reg t"); // Exchange and Branch auto target = ctx.cpu.reg[idx_rm]; ctx.cpu.cpsr.thumb = target & 1; return target & 0xFFFFFFFE; } else if (instr.opcode_upper7 == 0b1011'110) { // POP - Pop Multiple Registers // NOTE: This modifies the PC if bit8 is set ARM::ARMInstr arm_instr; arm_instr.raw = (0b1110'1000'1011'1101ul << 16) | ((instr.raw & 0x100) << 7) // bit8 denotes whether to pop PC | instr.register_list; // auto ret = ForwardThumbToARM(ctx, arm_instr); // Call the handler directly to get the jump target address auto ret = HandlerLDM_STM(ctx, arm_instr); if (instr.raw & 0x100) { // If we loaded to PC, jump to the return value // ctx.cfl.Return(ctx, "pop_t"); return ret; } else { // If we didn't load to PC, jump to the next instruction (which // is 2 minus the return value since HandlerLDM_STM, being // an ARM handler, added 4 to the current PC) return ret - 2; } } else { return HandlerStubThumb(ctx, instr, ""); } } void Processor::UnregisterContext(ExecutionContext& context) { auto ctx_it = std::find(contexts.begin(), contexts.end(), &context); if (ctx_it == contexts.end()) { throw std::runtime_error("Attempted to unregister unknown ExecutionContext"); } contexts.erase(ctx_it); } struct Interpreter final : public ProcessorWithDefaultMemory { Interpreter(Setup& setup_) : ProcessorWithDefaultMemory(setup_) { } ~Interpreter() override = default; void Run(ExecutionContext& ctx, ProcessorController& controller, uint32_t process_id, uint32_t thread_id) override; InterpreterExecutionContext* CreateExecutionContextImpl2() override; }; InterpreterExecutionContext* Interpreter::CreateExecutionContextImpl2() { return new InterpreterExecutionContext(*this, setup); } template static void StepWithDispatchTable(ExecutionContext& ctx_) try { auto& ctx = static_cast(ctx_); // if (!ctx.backtrace.empty() && ctx.cpu.PC() == ctx.backtrace.back().source + (ctx.cpu.cpsr.thumb ? 2 : 4)) // ctx.backtrace.pop_back(); // TODO: Instead of translating the PC here over and over again, just have the OS allocate linear .text memory instead and just get a pointer to it using Memory::LookupContiguousMemoryBackedPage! uint32_t pc_phys = *ctx.TranslateVirtualAddress(ctx.cpu.PC()); // Fetch and process next instruction if (ctx.cpu.cpsr.thumb) { // TODO: Do this check when a jump happens! // if (pc_phys % 2) // throw std::runtime_error("Unaligned THUMB PC"); ARM::ThumbInstr instr = { ReadPhysicalMemory(ctx.setup->mem, pc_phys) }; ctx.cpu.PC() = DispatchThumb(ctx, instr); } else { // TODO: Do this check when a jump or thumb/arm mode switch happens! // if (pc_phys % 4) // throw std::runtime_error("Unaligned ARM PC"); // TODO: This is always an aligned read. We can considerably speed up this operation with that in mind! ARM::ARMInstr instr = { ReadPhysicalMemory(ctx.setup->mem, pc_phys) }; if (instr.cond != 0xf) { ctx.cpu.PC() = (*arm_dispatch_table)[ARM::BuildDispatchTableKey(instr.raw)](ctx_, instr); } else { // Handle unconditional instructions explicitly if (instr.opcode_prim == 0b1010 || instr.opcode_prim == 0b1011) { // Branch with Link and Exchange Link(ctx); ctx.cpu.cpsr.thumb = 1; // bit24 determines the halfword at which to resume execution uint32_t target = ctx.cpu.PC() + 8 + ((4 * instr.branch_target) | ((instr.raw & 0x1000000) >> 23)); ctx.RecordCall(ctx.cpu.PC(), target, ctx.cpu); ctx.cfl.Branch(ctx, "bx_0xf", target); ctx.cpu.PC() = target; } else if (instr.raw == 0xf57ff01f) { // CLREX ClearExclusive(ctx); ctx.cpu.PC() = NextInstr(ctx); } else if ((instr.raw & 0b1111'1101'0111'0000'1111'0000'0000'0000) == 0b1111'0101'0101'0000'1111'0000'0000'0000) { // pld variants, not sure what this does specifically, but we probably don't need to implement it. ctx.cpu.PC() = NextInstr(ctx); } else { std::stringstream ss; ss << std::hex << std::setw(8) << std::setfill('0') << instr.raw; throw std::runtime_error("Unknown unconditional instruction 0x" + ss.str()); } } } } catch (const boost::context::detail::forced_unwind&) { throw; } catch (...) { fmt::print( "Exception thrown while running interpreter at PC {:#x} (process id {})\n", static_cast(ctx_).cpu.PC(), static_cast(ctx_).os->active_thread->GetParentProcess().GetId()); throw; } void Step(ExecutionContext& ctx) { StepWithDispatchTable<&default_dispatch_table>(ctx); } static void TriggerPreemption(InterpreterExecutionContext& ctx) { // NS shared font loading thread. This may not be preempted, otherwise we don't finish loading the font by the time applications want to access it if (ctx.setup->os->active_thread->GetParentProcess().GetId() == 7 && ctx.setup->os->active_thread->GetId() == 2) { return; } // Reset monitor address to avoid thread tearing issues where an // LDREX-STREX pair is interrupted by preemption ClearExclusive(ctx); // Preempt the current thread every now and then to make sure we don't end // up stuck in infinite loops waiting for other threads to do something. // NOTE: Technically, the CPU core applications generally run on does not // use preemptive scheduling, however threads on that core regardless // may be preempted under certain cirumstances. Hence, this isn't as // much of a hack as it might seem to be. // NOTE: This seems to be commonly used to spinlock for HID to update // shared memory fields // TODO: This might cause issues with some of our HLE code. Make sure to support ldrex/strex in HLE code to prevent race conditions! ctx.os->active_thread->callback_for_svc = [](std::shared_ptr thread) { thread->GetOS().Reschedule(thread); }; ctx.os->SwitchToSchedulerFromThread(*ctx.os->active_thread); } void Interpreter::Run(ExecutionContext& ctx_, ProcessorController& controller, uint32_t process_id, uint32_t thread_id) try { auto& ctx = static_cast(ctx_); ctx.controller = &controller; for (;;) { if (!ctx.debugger_attached) { // Run a bunch of instructions at a time, then check the debugging state again for (int i = 0; i < 10000; ++i) { // for (int i = 0; i < ctx.os->active_thread->GetParentProcess().GetId() == 17 ? 10 : 10000; ++i) { ++ctx.cpu.cycle_count; StepWithDispatchTable<&default_dispatch_table>(ctx); } TriggerPreemption(ctx); } else { for (auto& bp : ctx.breakpoints) { if (bp.address == ctx.cpu.PC()) { std::cerr << "INTERPRETER HIT BREAKPOINT" << std::endl; // Notify debugger about the breakpoint controller.NotifyBreakpoint(process_id, thread_id); controller.paused = true; break; } } // Check software breakpoints written by GDB // TODO: This adds one redundant memory read per iteration... instead change Step() to ProcessInstruction()! if (!ctx.cpu.cpsr.thumb) { ARM::ARMInstr instr = { ctx.ReadVirtualMemory(ctx.cpu.PC()) }; // "Trap" if (instr.raw == 0xe7ffdefe) { controller.NotifyBreakpoint(process_id, thread_id); controller.paused = true; } } else { // "Trap" ARM::ThumbInstr instr = { ctx.ReadVirtualMemory(ctx.cpu.PC()) }; if ((instr.raw & 0xff00) == 0xbe00) { controller.NotifyBreakpoint(process_id, thread_id); controller.paused = true; } } if (ctx.trap_on_resume) { ctx.trap_on_resume = false; ctx.controller->NotifyBreakpoint(ctx.os->active_thread->GetParentProcess().GetId(), ctx.os->active_thread->GetId()); ctx.controller->paused = true; } if (controller.ShouldPause(process_id, thread_id)) { // Set paused and wait for acknowledgement controller.paused = true; while (controller.request_pause) { } } if (controller.paused) { // Wait until we are requested to continue, then unpause, then wait until the unpausing has been noticed while (!controller.request_continue) { } controller.paused = false; while (controller.request_continue) { } } // Single step StepWithDispatchTable<&default_dispatch_table>(ctx); ++ctx.cpu.cycle_count; if ((ctx.cpu.cycle_count & 0xFFF) == 0) { TriggerPreemption(ctx); } } } } catch (const boost::context::detail::forced_unwind&) { throw; } catch (...) { fmt::print(stderr, "Exception thrown while running interpreter at PC {:#x}\n", static_cast(ctx_).cpu.PC()); throw; } std::unique_ptr CreateInterpreter(Setup& setup) { return std::make_unique(setup); } /** * Fallback interpreter usable by a JIT while translating binary code in the background * * This interpreter works just like the usual one, with the difference that on a branch it will yield control back to the given coroutine, passing it the branch target address */ struct TemporaryInterpreterForJIT final : public ProcessorWithDefaultMemory { TemporaryInterpreterForJIT(Setup& setup_) : ProcessorWithDefaultMemory(setup_) { } ~TemporaryInterpreterForJIT() override = default; void Run(ExecutionContext& ctx, ProcessorController& controller, uint32_t process_id, uint32_t thread_id) override; InterpreterExecutionContext* CreateExecutionContextImpl2() override; }; // TODO: Come up with a cleaner interface... void SetParentCoroutine(ExecutionContext& ctx, boost::coroutines2::coroutine::push_type& coro) { static_cast(ctx).coro = &coro; } InterpreterExecutionContext* TemporaryInterpreterForJIT::CreateExecutionContextImpl2() { return new InterpreterExecutionContext(*this, setup); } template static uint32_t HandlerBranchForTemporaryInterpreter(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { // Jumping to a function; notify JIT about this auto target = HandlerBranch(ctx, instr); // TODO: Fix for HostThreadBasedThreadControl (*ctx.coro)(target); // TODO: What if JIT decides to stop interpreting here? Upon resume, will assign this target to PC... return target; } static uint32_t HandlerSWIForTemporaryInterpreter(InterpreterExecutionContext& ctx, ARM::ARMInstr instr) { // Switch back to the JIT since OS uses the JIT's ExecutionContext when processing system calls // TODO: Fix for HostThreadBasedThreadControl (*ctx.coro)(ctx.cpu.reg[15]); return NextInstr(ctx); } InterpreterARMHandlerForJIT LookupHandlerForTemporaryInterpreter(ARM::Instr instr) { // TODO: Enable other instructions, but take care that the branch target is always a *function* rather than just a basic block switch (instr) { // TODO: Consider supporting branches through these! // case ARM::Instr::AND: return Wrap; // case ARM::Instr::EOR: return Wrap; // case ARM::Instr::SUB: return Wrap; // case ARM::Instr::RSB: return Wrap; // case ARM::Instr::ADD: return Wrap; // case ARM::Instr::ADC: return Wrap; // case ARM::Instr::SBC: return Wrap; // case ARM::Instr::RSC: return Wrap; // case ARM::Instr::TST: return Wrap; // case ARM::Instr::TEQ: return Wrap; // case ARM::Instr::CMP: return Wrap; // case ARM::Instr::CMN: return Wrap; // case ARM::Instr::ORR: return Wrap; // case ARM::Instr::MOV: return Wrap; // case ARM::Instr::BIC: return Wrap; // case ARM::Instr::MVN: return Wrap; // case ARM::Instr::B: return Wrap>; case ARM::Instr::BL: return Wrap>; // case ARM::Instr::BX: return Wrap>; // case ARM::Instr::BLX: return Wrap>; // TODO: Support branches through loading to PC! // case ARM::Instr::LDR: return Wrap>; // case ARM::Instr::LDRB: return Wrap>; // case ARM::Instr::LDRH: return Wrap>; // case ARM::Instr::LDRSH: return Wrap>; // case ARM::Instr::LDRSB: return Wrap>; // case ARM::Instr::LDRD: return Wrap>; // case ARM::Instr::LDM: return Wrap>; case ARM::Instr::SWI: return Wrap; default: return LookupHandler(instr); } } static const auto temporary_interpreter_dispatch_table = GenerateDispatchTable(LookupHandlerForTemporaryInterpreter, Wrap); void TemporaryInterpreterForJIT::Run(ExecutionContext& ctx_, ProcessorController& controller, uint32_t process_id, uint32_t thread_id) { auto& ctx = static_cast(ctx_); ctx.controller = &controller; for (;;) { // std::cerr << "TemporaryInterpreter running at 0x" << ctx.cpu.reg[15] << std::endl; StepWithDispatchTable<&temporary_interpreter_dispatch_table>(ctx); } } std::unique_ptr CreateTemporaryInterpreterForJIT(Setup& setup) { return std::make_unique(setup); } // TODO: Better interface uint32_t ReadPCFrom(ExecutionContext& ctx) { return static_cast(ctx).cpu.reg[15]; } // TODO: Better interface bool CheckIsThumbFrom(ExecutionContext& ctx) { return (static_cast(ctx).cpu.cpsr.thumb == 1); } } // namespace Interpreter