From 117a68b7f22d83c064204f988fa6379e9f78e367 Mon Sep 17 00:00:00 2001 From: OBarronCS <55004530+OBarronCS@users.noreply.github.com> Date: Thu, 8 Aug 2024 07:00:01 -0700 Subject: [PATCH] Annotate all load operations Arm, AArch64, RISCV, and MIPS (#2309) * _generate_load_annotator * Parent class function for load-instruction annotations. RISCV loads implemented and tested. SPARC load/store instructions noted * Get ARM32 load and store instructions * Add all AArch64 loads and stores * MIPS memory address resolver * AArch64 memory resolver * AArch64 shift operation in memory operands * Arm resolve memory operands and PC special case * Lint * comment * rebase and lint * comment corrections * Fill in arm function maps * extract mips load instructions * lint * Remove unnecessary parameter to enhancement telescope function * Implement signed loads * Now with load code moved to parent, refactor in x86 class * lint * aarch64 read size fix * arm thumb mode pc + 4 * read thumb bit from emu when needed * lint * rebase * lint * rebase * Add load annotator to MIPS * lint * fix last aarch64 register thing * minor fixes --- pwndbg/gdblib/disasm/aarch64.py | 177 +++++++++++++++++++++++++++----- pwndbg/gdblib/disasm/arch.py | 85 +++++++++++++-- pwndbg/gdblib/disasm/arm.py | 119 ++++++++++++++++++++- pwndbg/gdblib/disasm/mips.py | 64 +++++++++--- pwndbg/gdblib/disasm/riscv.py | 99 ++++++++++++++++++ pwndbg/gdblib/disasm/sparc.py | 20 ++++ pwndbg/gdblib/disasm/x86.py | 51 +++------ 7 files changed, 527 insertions(+), 88 deletions(-) diff --git a/pwndbg/gdblib/disasm/aarch64.py b/pwndbg/gdblib/disasm/aarch64.py index cdb5bc9c6..e30a61cc1 100644 --- a/pwndbg/gdblib/disasm/aarch64.py +++ b/pwndbg/gdblib/disasm/aarch64.py @@ -20,6 +20,83 @@ from pwndbg.gdblib.disasm.instruction import InstructionCondition from pwndbg.gdblib.disasm.instruction import PwndbgInstruction from pwndbg.gdblib.disasm.instruction import boolean_to_instruction_condition +# Negative size indicates signed read +# None indicates the read size depends on the target register +AARCH64_SINGLE_LOAD_INSTRUCTIONS: Dict[int, int | None] = { + ARM64_INS_LDRB: 1, + ARM64_INS_LDURB: 1, + ARM64_INS_LDRSB: -1, + ARM64_INS_LDURSB: -1, + ARM64_INS_LDRH: 2, + ARM64_INS_LDURH: 2, + ARM64_INS_LDRSH: -2, + ARM64_INS_LDURSH: -2, + ARM64_INS_LDURSW: -4, + ARM64_INS_LDRSW: -4, + ARM64_INS_LDUR: None, + ARM64_INS_LDR: None, + ARM64_INS_LDTRB: 1, + ARM64_INS_LDTRSB: -1, + ARM64_INS_LDTRH: 2, + ARM64_INS_LDTRSH: -2, + ARM64_INS_LDTRSW: -4, + ARM64_INS_LDTR: None, + ARM64_INS_LDXRB: 1, + ARM64_INS_LDXRH: 2, + ARM64_INS_LDXR: None, + ARM64_INS_LDARB: 1, + ARM64_INS_LDARH: 2, + ARM64_INS_LDAR: None, +} + +AARCH64_SINGLE_STORE_INSTRUCTIONS: Dict[int, int | None] = { + ARM64_INS_STRB: 1, + ARM64_INS_STURB: 1, + ARM64_INS_STRH: 2, + ARM64_INS_STURH: 2, + ARM64_INS_STUR: None, + ARM64_INS_STR: None, + # Store Register (unprivileged) + ARM64_INS_STTRB: 1, + ARM64_INS_STTRH: 2, + ARM64_INS_STTR: None, + # Store Exclusive + ARM64_INS_STXRB: 1, + ARM64_INS_STXRH: 2, + ARM64_INS_STXR: None, + # Store-Release + ARM64_INS_STLRB: 1, + ARM64_INS_STLRH: 2, + ARM64_INS_STLR: None, + # Store-Release Exclusive + ARM64_INS_STLXRB: 1, + ARM64_INS_STLXRH: 2, + ARM64_INS_STLXR: None, +} + +# Parameters to each function: (value, shift_amt, bit_width) +AARCH64_BIT_SHIFT_MAP: Dict[int, Callable[[int, int, int], int]] = { + ARM64_SFT_LSL: bit_math.logical_shift_left, + ARM64_SFT_LSR: bit_math.logical_shift_right, + ARM64_SFT_ASR: bit_math.arithmetic_shift_right, + ARM64_SFT_ROR: bit_math.rotate_right, +} + + +# These are "Extend" operations - https://devblogs.microsoft.com/oldnewthing/20220728-00/?p=106912 +# They take in a number, extract a byte, halfword, or word, +# and perform a zero- or sign-extend operation. +AARCH64_EXTEND_MAP: Dict[int, Callable[[int], int]] = { + ARM64_EXT_UXTB: lambda x: x & ((1 << 8) - 1), + ARM64_EXT_UXTH: lambda x: x & ((1 << 16) - 1), + ARM64_EXT_UXTW: lambda x: x & ((1 << 32) - 1), + ARM64_EXT_UXTX: lambda x: x, # UXTX has no effect. It extracts 64-bits from a 64-bit register. + ARM64_EXT_SXTB: lambda x: bit_math.to_signed(x, 8), + ARM64_EXT_SXTH: lambda x: bit_math.to_signed(x, 16), + ARM64_EXT_SXTW: lambda x: bit_math.to_signed(x, 32), + ARM64_EXT_SXTX: lambda x: bit_math.to_signed(x, 64), +} + def resolve_condition(condition: int, cpsr: int) -> InstructionCondition: """ @@ -56,29 +133,6 @@ def resolve_condition(condition: int, cpsr: int) -> InstructionCondition: return InstructionCondition.TRUE if condition else InstructionCondition.FALSE -# Parameters to each function: (value, shift_amt, bit_width) -AARCH64_BIT_SHIFT_MAP: Dict[int, Callable[[int, int, int], int]] = { - ARM64_SFT_LSL: bit_math.logical_shift_left, - ARM64_SFT_LSR: bit_math.logical_shift_right, - ARM64_SFT_ASR: bit_math.arithmetic_shift_right, - ARM64_SFT_ROR: bit_math.rotate_right, -} - -# These are "Extend" operations - https://devblogs.microsoft.com/oldnewthing/20220728-00/?p=106912 -# They take in a number, extract a byte, halfword, or word, -# and perform a zero- or sign-extend operation. -AARCH64_EXTEND_MAP: Dict[int, Callable[[int], int]] = { - ARM64_EXT_UXTB: lambda x: x & ((1 << 8) - 1), - ARM64_EXT_UXTH: lambda x: x & ((1 << 16) - 1), - ARM64_EXT_UXTW: lambda x: x & ((1 << 32) - 1), - ARM64_EXT_UXTX: lambda x: x, # UXTX has no effect. It extracts 64-bits from a 64-bit register. - ARM64_EXT_SXTB: lambda x: bit_math.to_signed(x, 8), - ARM64_EXT_SXTH: lambda x: bit_math.to_signed(x, 16), - ARM64_EXT_SXTW: lambda x: bit_math.to_signed(x, 32), - ARM64_EXT_SXTX: lambda x: bit_math.to_signed(x, 64), -} - - class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): def __init__(self, architecture: str) -> None: super().__init__(architecture) @@ -90,8 +144,6 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): ARM64_INS_ADR: self._common_generic_register_destination, # ADRP ARM64_INS_ADRP: self._common_generic_register_destination, - # LDR - ARM64_INS_LDR: self._common_generic_register_destination, # ADD ARM64_INS_ADD: self._common_generic_register_destination, # SUB @@ -108,6 +160,26 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): ARM64_INS_CCMN: self._common_cmp_annotator_builder("cpsr", ""), } + @override + def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None: + # Dispatch to the correct handler + if instruction.id in AARCH64_SINGLE_LOAD_INSTRUCTIONS: + target_reg_size = self._register_width(instruction, instruction.operands[0]) // 8 + read_size = AARCH64_SINGLE_LOAD_INSTRUCTIONS[instruction.id] or target_reg_size + + self._common_load_annotator( + instruction, + emu, + instruction.operands[1].before_value, + abs(read_size), + read_size < 0, + target_reg_size, + instruction.operands[0].str, + instruction.operands[1].str, + ) + else: + self.annotation_handlers.get(instruction.id, lambda *a: None)(instruction, emu) + @override def _condition( self, instruction: PwndbgInstruction, emu: Emulator @@ -164,9 +236,58 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): return super()._resolve_target(instruction, emu, call) @override - def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None: - # Dispatch to the correct handler - self.annotation_handlers.get(instruction.id, lambda *a: None)(instruction, emu) + def _parse_memory( + self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator + ) -> int | None: + """ + Parse the `Arm64OpMem` Capstone object to determine the concrete memory address used. + + Three types of AArch64 memory operands: + 1. Register base with optional immediate offset + Examples: + ldrb w3, [x2] + str x1, [x2, #0xb58] + ldr x4,[x3], 4 + 2. Register + another register with an optional shift + Examples: + ldrb w1, [x9, x2] + str x1, [x2, x0, lsl #3] + 3. Register + 32-bit register extended and shifted. + The shift in this case is implicitly a LSL + Examples: + ldr x1, [x2, w22, UXTW #3] + + """ + + target = 0 + + # All memory operands have `base` defined + base = self._read_register(instruction, op.mem.base, emu) + if base is None: + return None + target = base + op.mem.disp + + # If there is an index register + if op.mem.index != 0: + index = self._read_register(instruction, op.mem.index, emu) + if index is None: + return None + + # Optionally apply an extend to the index register + if op.cs_op.ext != 0: + index = AARCH64_EXTEND_MAP[op.cs_op.ext](index) + + # Optionally apply shift to the index register + # This handles shifts in the extend operation as well: + # As in the case of `ldr x1, [x2, w22, UXTW #3]`, + # Capstone will automatically make the shift a LSL and set the value to 3 + if op.cs_op.shift.type != 0: + # The form of instructions with a shift always apply the shift to a 64-bit value + index = AARCH64_BIT_SHIFT_MAP[op.cs_op.shift.type](index, op.cs_op.shift.value, 64) + + target += index + + return target def _register_width(self, instruction: PwndbgInstruction, op: EnhancedOperand) -> int: return 32 if instruction.cs_insn.reg_name(op.reg)[0] == "w" else 64 diff --git a/pwndbg/gdblib/disasm/arch.py b/pwndbg/gdblib/disasm/arch.py index 58aa03d68..709009571 100644 --- a/pwndbg/gdblib/disasm/arch.py +++ b/pwndbg/gdblib/disasm/arch.py @@ -10,6 +10,7 @@ from capstone import * # noqa: F403 import pwndbg.chain import pwndbg.color.context as C import pwndbg.color.memory as MemoryColor +import pwndbg.color.message as MessageColor import pwndbg.color.syntax_highlight as H import pwndbg.enhance import pwndbg.gdblib.memory @@ -18,6 +19,7 @@ import pwndbg.gdblib.symbol import pwndbg.gdblib.typeinfo import pwndbg.gdblib.vmmap import pwndbg.lib.config +import pwndbg.lib.disasm.helpers as bit_math from pwndbg.emu.emulator import Emulator from pwndbg.gdblib.disasm.instruction import FORWARD_JUMP_GROUP from pwndbg.gdblib.disasm.instruction import EnhancedOperand @@ -394,10 +396,9 @@ class DisassemblyAssistant: address: int, size: int, instruction: PwndbgInstruction, - operand: EnhancedOperand, emu: Emulator, ) -> int | None: - address_list = self._telescope(address, 1, instruction, operand, emu, read_size=size) + address_list = self._telescope(address, 1, instruction, emu, read_size=size) if len(address_list) >= 2: return address_list[1] @@ -426,7 +427,7 @@ class DisassemblyAssistant: elif operand.type == CS_OP_MEM: # Assume that we are reading ptrsize - subclasses should override this function # to provide a more specific value if needed - self._read_memory(value, pwndbg.gdblib.arch.ptrsize, instruction, operand, emu) + self._read_memory(value, pwndbg.gdblib.arch.ptrsize, instruction, emu) return None @@ -435,7 +436,6 @@ class DisassemblyAssistant: address: int, limit: int, instruction: PwndbgInstruction, - operand: EnhancedOperand, emu: Emulator, read_size: int = None, ) -> List[int]: @@ -470,7 +470,7 @@ class DisassemblyAssistant: else: return pwndbg.chain.get(address, limit=limit) - elif not can_read_process_state or operand.type == CS_OP_IMM: + else: # If the target address is in a non-writeable map, we can pretty safely telescope # This is best-effort to give a better experience @@ -746,7 +746,6 @@ class DisassemblyAssistant: left.after_value, TELESCOPE_DEPTH + 1, instruction, - left, emu, read_size=pwndbg.gdblib.arch.ptrsize, ) @@ -801,5 +800,79 @@ class DisassemblyAssistant: return handler + def _common_load_annotator( + self, + instruction: PwndbgInstruction, + emu: Emulator, + address: int | None, + read_size: int, + signed: bool, + target_size: int, + dest_str: str, + source_str: str, + ) -> None: + """ + This function annotates load instructions - moving data from memory into a register. + + These instructions read `read_size` bytes from memory into a register. + + `signed`: whether or not we are loading a signed value from memory + `target_size`: the size of the register in bytes - relevent for sign-extension + `dest_str`: a string representing the destination register ('rax') + `source_str`: a string representing the source address ('[0x7fffffffe138]') + """ + + if address is None: + return + + # There are many cases we need to consider when we are loading a value from memory + # Were we able to reason about the memory address, and dereference it? + # Does the resolved memory address actual point into memory? + # If the target register size is larger than the read size, then do we need sign-extension? + + # If the address is not mapped, we segfaulted + if not pwndbg.gdblib.memory.peek(address): + instruction.annotation = MessageColor.error( + f"" + ) + else: + # In this branch, it is assumed that the address IS in a mapped page + TELESCOPE_DEPTH = max(1, int(pwndbg.config.disasm_telescope_depth)) + + telescope_addresses = self._telescope( + address, + TELESCOPE_DEPTH, + instruction, + emu, + read_size=read_size, + ) + + if len(telescope_addresses) == 1: + # If telescope returned only 1 address (and we already know the address is in a mapped page) + # it means we couldn't reason about the dereferenced memory. + # In this case, simply display the address + + # As an example, this path is taken for the following case: + # mov rdi, qword ptr [rip + 0x17d40] where the resolved memory address is in writeable memory, + # and we are not emulating. This means we cannot savely dereference if PC is not at the current instruction address, + # because the the memory address could have been written to by the time the instruction executes + telescope_print = None + else: + if signed and read_size != target_size and len(telescope_addresses) == 2: + # We sign extend the value, then convert it back to the unsigned bit representation + final_value = bit_math.to_signed(telescope_addresses[1], read_size * 8) & ( + (1 << (target_size * 8)) - 1 + ) + # If it's a signed read that required extension, it will just be a number with no special symbol/color needed + telescope_print = hex(final_value) + else: + # Start showing at dereferenced address, hence the [1:] + telescope_print = f"{self._telescope_format_list(telescope_addresses[1:], TELESCOPE_DEPTH, emu)}" + + instruction.annotation = f"{dest_str}, {source_str}" + + if telescope_print is not None: + instruction.annotation += f" => {telescope_print}" + generic_assistant = DisassemblyAssistant(None) diff --git a/pwndbg/gdblib/disasm/arm.py b/pwndbg/gdblib/disasm/arm.py index 7100511ad..4771a02dc 100644 --- a/pwndbg/gdblib/disasm/arm.py +++ b/pwndbg/gdblib/disasm/arm.py @@ -5,6 +5,7 @@ from typing import Dict from capstone import * # noqa: F403 from capstone.arm import * # noqa: F403 +from pwnlib.util.misc import align_down from typing_extensions import override import pwndbg.gdblib.arch @@ -13,6 +14,7 @@ import pwndbg.gdblib.memory import pwndbg.gdblib.regs import pwndbg.lib.disasm.helpers as bit_math from pwndbg.emu.emulator import Emulator +from pwndbg.gdblib.arch import read_thumb_bit as process_read_thumb_bit from pwndbg.gdblib.disasm.instruction import EnhancedOperand from pwndbg.gdblib.disasm.instruction import InstructionCondition from pwndbg.gdblib.disasm.instruction import PwndbgInstruction @@ -25,6 +27,34 @@ ARM_BIT_SHIFT_MAP: Dict[int, Callable[[int, int, int], int]] = { ARM_SFT_ROR: bit_math.rotate_right, } +ARM_SINGLE_LOAD_INSTRUCTIONS = { + ARM_INS_LDRB: 1, + ARM_INS_LDRSB: -1, + ARM_INS_LDRH: 2, + ARM_INS_LDRSH: -2, + ARM_INS_LDR: 4, + ARM_INS_LDRBT: 1, + ARM_INS_LDRSBT: -1, + ARM_INS_LDRHT: 2, + ARM_INS_LDRSHT: -2, + ARM_INS_LDRT: 4, + ARM_INS_LDREXB: 1, + ARM_INS_LDREXH: 2, + ARM_INS_LDREX: 4, +} + +ARM_SINGLE_STORE_INSTRUCTIONS = { + ARM_INS_STRB: 1, + ARM_INS_STRH: 2, + ARM_INS_STR: 4, + ARM_INS_STRBT: 1, + ARM_INS_STRHT: 2, + ARM_INS_STRT: 4, + ARM_INS_STREXB: 1, + ARM_INS_STREXH: 2, + ARM_INS_STREX: 4, +} + class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): def __init__(self, architecture: str) -> None: @@ -43,8 +73,20 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): @override def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None: - # Dispatch to the correct handler - self.annotation_handlers.get(instruction.id, lambda *a: None)(instruction, emu) + if instruction.id in ARM_SINGLE_LOAD_INSTRUCTIONS: + read_size = ARM_SINGLE_LOAD_INSTRUCTIONS[instruction.id] + self._common_load_annotator( + instruction, + emu, + instruction.operands[1].before_value, + abs(read_size), + read_size < 0, + 4, + instruction.operands[0].str, + instruction.operands[1].str, + ) + else: + self.annotation_handlers.get(instruction.id, lambda *a: None)(instruction, emu) @override def _condition(self, instruction: PwndbgInstruction, emu: Emulator) -> InstructionCondition: @@ -99,8 +141,8 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): target = target & ~1 return target - @override - def _memory_string(self, instruction: PwndbgInstruction, op: EnhancedOperand) -> str: + # Currently not used + def _memory_string_old(self, instruction: PwndbgInstruction, op: EnhancedOperand) -> str: parts = [] if op.mem.base != 0: @@ -116,10 +158,79 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): return f"[{(', '.join(parts))}]" + def read_thumb_bit(self, instruction: PwndbgInstruction, emu: Emulator) -> int | None: + if emu: + return emu.read_thumb_bit() + elif self.can_reason_about_process_state(instruction): + # Read the Thumb bit directly from the process flag register if we can + return process_read_thumb_bit() + else: + return 0 + @override def _immediate_string(self, instruction, operand): return "#" + super()._immediate_string(instruction, operand) + @override + def _read_register( + self, instruction: PwndbgInstruction, operand_id: int, emu: Emulator + ) -> int | None: + # When `pc` is referenced in an operand (typically in a memory operand), the value it takes on + # is `pc_at_instruction + 8`. In Thumb mode, you only add 4 to the instruction address. + if operand_id == ARM_REG_PC: + return instruction.address + (4 if self.read_thumb_bit(instruction, emu) else 8) + + return super()._read_register(instruction, operand_id, emu) + + @override + def _parse_memory( + self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator + ) -> int | None: + """ + Parse the `ArmOpMem` Capstone object to determine the concrete memory address used. + + Types of memory operands: + [Rn] + [Rn, #imm] + [Rn, Rm] + [Rn, Rm, #imm] + + Capstone represents the object a bit differently then AArch64 to align with the underlying architecture of Arm. + + This representation will change in Capstone 6: + https://github.com/capstone-engine/capstone/issues/2281 + https://github.com/capstone-engine/capstone/pull/1949 + """ + + target = 0 + + # All memory operands have `base` defined + base = self._read_register(instruction, op.mem.base, emu) + if base is None: + return None + + if op.mem.base == ARM_REG_PC: + # The PC as the base register is a special case - it will align the address to a word (32-bit) boundary + # Explanation: https://stackoverflow.com/a/29588678 + # See "Operation" at the bottom of https://developer.arm.com/documentation/ddi0597/2024-03/Base-Instructions/LDR--literal---Load-Register--literal-- + base = align_down(4, base) + + target = base + op.mem.disp + + # If there is an index register + if op.mem.index != 0: + index = self._read_register(instruction, op.mem.index, emu) + if index is None: + return None + + # Optionally apply shift to the index register + if op.cs_op.shift.type != 0: + index = ARM_BIT_SHIFT_MAP[op.cs_op.shift.type](index, op.cs_op.shift.value, 32) + + target += index * (-1 if op.cs_op.subtracted else 1) + + return target + @override def _parse_register( self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator diff --git a/pwndbg/gdblib/disasm/mips.py b/pwndbg/gdblib/disasm/mips.py index 9be5fce0a..d923b0d79 100644 --- a/pwndbg/gdblib/disasm/mips.py +++ b/pwndbg/gdblib/disasm/mips.py @@ -75,23 +75,9 @@ MIPS_SIMPLE_DESTINATION_INSTRUCTIONS = { MIPS_INS_DCLZ, MIPS_INS_DSUB, MIPS_INS_DSUBU, - MIPS_INS_LB, - MIPS_INS_LBU, - MIPS_INS_LD, - MIPS_INS_LDL, - MIPS_INS_LDPC, - MIPS_INS_LDR, - MIPS_INS_LH, - MIPS_INS_LHU, MIPS_INS_LSA, MIPS_INS_DLSA, MIPS_INS_LUI, - MIPS_INS_LW, - MIPS_INS_LWL, - MIPS_INS_LWPC, - MIPS_INS_LWR, - MIPS_INS_LWU, - MIPS_INS_LWUPC, MIPS_INS_MFHI, MIPS_INS_MFLO, MIPS_INS_SEB, @@ -105,6 +91,26 @@ MIPS_SIMPLE_DESTINATION_INSTRUCTIONS = { MIPS_INS_SLTI, MIPS_INS_SLTIU, MIPS_INS_SLTU, + # Rare - unaligned read - have complex loading logic + MIPS_INS_LDL, + MIPS_INS_LDR, + # Rare - partial load on portions of address + MIPS_INS_LWL, + MIPS_INS_LWR, +} + +# All MIPS load instructions +MIPS_LOAD_INSTRUCTIONS = { + MIPS_INS_LB: 1, + MIPS_INS_LBU: 1, + MIPS_INS_LH: 2, + MIPS_INS_LHU: 2, + MIPS_INS_LW: 4, + MIPS_INS_LWU: 4, + MIPS_INS_LWPC: 4, + MIPS_INS_LWUPC: 4, + MIPS_INS_LD: 8, + MIPS_INS_LDPC: 8, } @@ -115,7 +121,20 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): @override def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None: - if instruction.id in MIPS_SIMPLE_DESTINATION_INSTRUCTIONS: + if instruction.id in MIPS_LOAD_INSTRUCTIONS: + read_size = MIPS_LOAD_INSTRUCTIONS[instruction.id] + + self._common_load_annotator( + instruction, + emu, + instruction.operands[1].before_value, + abs(read_size), + read_size < 0, + pwndbg.gdblib.arch.ptrsize, + instruction.operands[0].str, + instruction.operands[1].str, + ) + elif instruction.id in MIPS_SIMPLE_DESTINATION_INSTRUCTIONS: self._common_generic_register_destination(instruction, emu) @override @@ -152,5 +171,20 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): return super()._resolve_target(instruction, emu, call) + @override + def _parse_memory( + self, + instruction: PwndbgInstruction, + op: pwndbg.gdblib.disasm.arch.EnhancedOperand, + emu: Emulator, + ) -> int | None: + """ + Parse the `MipsOpMem` Capstone object to determine the concrete memory address used. + """ + base = self._read_register(instruction, op.mem.base, emu) + if base is None: + return None + return base + op.mem.disp + assistant = DisassemblyAssistant("mips") diff --git a/pwndbg/gdblib/disasm/riscv.py b/pwndbg/gdblib/disasm/riscv.py index a85488565..ffcc0825a 100644 --- a/pwndbg/gdblib/disasm/riscv.py +++ b/pwndbg/gdblib/disasm/riscv.py @@ -4,20 +4,107 @@ from capstone import * # noqa: F403 from capstone.riscv import * # noqa: F403 from typing_extensions import override +import pwndbg.color.memory as MemoryColor import pwndbg.gdblib.arch import pwndbg.gdblib.disasm.arch import pwndbg.gdblib.regs import pwndbg.lib.disasm.helpers as bit_math from pwndbg.emu.emulator import Emulator +from pwndbg.gdblib.disasm.instruction import EnhancedOperand from pwndbg.gdblib.disasm.instruction import InstructionCondition from pwndbg.gdblib.disasm.instruction import PwndbgInstruction +RISCV_LOAD_INSTRUCTIONS = { + # Sign-extend loads + RISCV_INS_LB: -1, + RISCV_INS_LH: -2, + RISCV_INS_LW: -4, + # Zero-extend loads + RISCV_INS_LBU: 1, + RISCV_INS_LHU: 2, + RISCV_INS_LWU: 4, + RISCV_INS_LD: 8, +} + +# Due to a bug in Capstone, these instructions have incorrect operands to represent a memory address. +# So we temporarily separate them to handle them differently +# This will be fixed in Capstone 6 - https://github.com/capstone-engine/capstone/pull/2393 +# TODO: remove this when updating to Capstone 6 +RISCV_COMPRESSED_LOAD_INSTRUCTIONS = {RISCV_INS_C_LW: -4, RISCV_INS_C_LD: 8, RISCV_INS_C_LDSP: 8} + +RISCV_STORE_INSTRUCTIONS = { + RISCV_INS_SB: 1, + RISCV_INS_SH: 2, + RISCV_INS_SW: 4, + RISCV_INS_SD: 8, +} + +# TODO: remove this when updating to Capstone 6 +RISCV_COMPRESSED_STORE_INSTRUCTIONS = { + RISCV_INS_C_SW: 4, + RISCV_INS_C_SD: 8, +} + class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): def __init__(self, architecture) -> None: super().__init__(architecture) self.architecture = architecture + @override + def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None: + if instruction.id in RISCV_LOAD_INSTRUCTIONS: + read_size = RISCV_LOAD_INSTRUCTIONS[instruction.id] + self._common_load_annotator( + instruction, + emu, + instruction.operands[1].before_value, + abs(read_size), + read_size < 0, + pwndbg.gdblib.arch.ptrsize, + instruction.operands[0].str, + instruction.operands[1].str, + ) + + # TODO: remove this when updating to Capstone 6 + if instruction.id in RISCV_COMPRESSED_LOAD_INSTRUCTIONS: + # We need to manually resolve this now since Capstone doesn't properly represent + # memory operands for compressed instructions. + address = self._resolve_compressed_target_addr(instruction, emu) + if address is not None: + read_size = RISCV_COMPRESSED_LOAD_INSTRUCTIONS[instruction.id] + + dest_str = f"[{MemoryColor.get_address_or_symbol(address)}]" + + self._common_load_annotator( + instruction, + emu, + address, + abs(read_size), + read_size < 0, + pwndbg.gdblib.arch.ptrsize, + instruction.operands[0].str, + dest_str, + ) + + return super()._set_annotation_string(instruction, emu) + + def _resolve_compressed_target_addr( + self, instruction: PwndbgInstruction, emu: Emulator + ) -> int | None: + """ + Calculate the address used in a compressed load/store instruction. + None if address cannot be resolved. + + TODO: remove this when updating to Capstone 6 + """ + _, disp, reg = instruction.operands + + if disp.before_value is None or reg.before_value is None: + return None + + return disp.before_value + reg.before_value + def _is_condition_taken( self, instruction: PwndbgInstruction, emu: Emulator | None ) -> InstructionCondition: @@ -102,6 +189,18 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): return super()._resolve_target(instruction, emu, call) + @override + def _parse_memory( + self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator + ) -> int | None: + """ + Parse the `RISCVOpMem` Capstone object to determine the concrete memory address used. + """ + base = self._read_register(instruction, op.mem.base, emu) + if base is None: + return None + return base + op.mem.disp + assistant_rv32 = DisassemblyAssistant("rv32") assistant_rv64 = DisassemblyAssistant("rv64") diff --git a/pwndbg/gdblib/disasm/sparc.py b/pwndbg/gdblib/disasm/sparc.py index e69de29bb..85e07081a 100644 --- a/pwndbg/gdblib/disasm/sparc.py +++ b/pwndbg/gdblib/disasm/sparc.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from capstone.sparc import * # noqa: F403 + +# Instruction groups for future use +SPARC_LOAD_INSTRUCTIONS = { + SPARC_INS_LDUB: 1, + SPARC_INS_LDSB: 1, + SPARC_INS_LDUH: 2, + SPARC_INS_LDSH: 2, + SPARC_INS_LD: 4, + SPARC_INS_LDD: 8, +} + +SPARC_STORE_INSTRUCTIONS = { + SPARC_INS_STB: 1, + SPARC_INS_STH: 2, + SPARC_INS_ST: 4, + SPARC_INS_STD: 8, +} diff --git a/pwndbg/gdblib/disasm/x86.py b/pwndbg/gdblib/disasm/x86.py index 09fbca8bb..ef65f3b41 100644 --- a/pwndbg/gdblib/disasm/x86.py +++ b/pwndbg/gdblib/disasm/x86.py @@ -79,8 +79,20 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): def handle_mov(self, instruction: PwndbgInstruction, emu: Emulator) -> None: left, right = instruction.operands - # Read from right operand - if right.before_value is not None: + # If this is a LOAD operation - MOV REG, [MEM] + if left.type == CS_OP_REG and right.type == CS_OP_MEM: + self._common_load_annotator( + instruction, + emu, + right.before_value, + right.cs_op.size, + False, + right.cs_op.size, + left.str, + right.str, + ) + # Handle other cases of MOV + elif right.before_value is not None: TELESCOPE_DEPTH = max(0, int(pwndbg.config.disasm_telescope_depth)) # +1 to ensure we telescope enough to read at least one address for the last "elif" below @@ -88,7 +100,6 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): right.before_value, TELESCOPE_DEPTH + 1, instruction, - right, emu, read_size=right.cs_op.size, ) @@ -111,36 +122,6 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): elif left.type == CS_OP_REG and right.type in (CS_OP_REG, CS_OP_IMM): instruction.annotation = f"{left.str} => {super()._telescope_format_list(telescope_addresses, TELESCOPE_DEPTH, emu)}" - # MOV REG, [MEM] - elif left.type == CS_OP_REG and right.type == CS_OP_MEM: - # There are many cases we need to consider if there is a mov from a dereference memory location into a register - # Were we able to reason about the memory address, and dereference it? - # Does the resolved memory address actual point into memory? - - # right.before_value should be a pointer in this context. If we telescoped and still returned just the value itself, - # it indicates that the dereference likely segfaults - - if not pwndbg.gdblib.memory.peek(right.before_value): - telescope_print = MessageColor.error( - f"" - ) - elif len(telescope_addresses) == 1: - # If only one address, and we didn't telescope, it means we couldn't reason about the dereferenced memory - # Simply display the address - - # As an example, this path is taken for the following case: - # mov rdi, qword ptr [rip + 0x17d40] where the resolved memory address is in writeable memory, - # and we are not emulating. This means we cannot savely dereference (if PC is not at the current instruction address) - telescope_print = None - else: - # Start showing at dereferenced by, hence the [1:] - telescope_print = f"{super()._telescope_format_list(telescope_addresses[1:], TELESCOPE_DEPTH, emu)}" - - if telescope_print is not None: - instruction.annotation = f"{left.str}, {right.str} => {telescope_print}" - else: - instruction.annotation = f"{left.str}, {right.str}" - def handle_vmovaps(self, instruction: PwndbgInstruction, emu: Emulator) -> None: # If the source or destination is in memory, it must be aligned to: # 16 bytes for SSE, 32 bytes for AVX, 64 bytes for AVX-512 @@ -172,7 +153,7 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): if right.before_value is not None: telescope_addresses = super()._telescope( - right.before_value, TELESCOPE_DEPTH, instruction, right, emu + right.before_value, TELESCOPE_DEPTH, instruction, emu ) instruction.annotation = f"{left.str} => {super()._telescope_format_list(telescope_addresses, TELESCOPE_DEPTH, emu)}" @@ -281,7 +262,7 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant): return None if operand.type == CS_OP_MEM: - return self._read_memory(value, operand.cs_op.size, instruction, operand, emu) + return self._read_memory(value, operand.cs_op.size, instruction, emu) else: return super()._resolve_used_value(value, instruction, operand, emu)