From f86570ae44a391d409a93cd722751b22d676714f Mon Sep 17 00:00:00 2001 From: OBarronCS <55004530+OBarronCS@users.noreply.github.com> Date: Thu, 17 Apr 2025 04:42:34 -0700 Subject: [PATCH] Small Refactor to DisassemblyAssistant (#2879) * Refactor DisassemblyAssistant so they can be used to share/track information between instructions being enhanced in the same pass * Fix incorrect change in breakonconditionalbranch + add comment * rebase * Move disasm/__init__.py -> ../disassembly.py in order to avoid a circular import. Create central list of all DisassemblyAssistant's * move disassembly.py file back to disasm folder --- pwndbg/aglib/__init__.py | 8 - pwndbg/aglib/disasm/__init__.py | 508 ------------------ pwndbg/aglib/disasm/aarch64.py | 7 +- pwndbg/aglib/disasm/arch.py | 43 +- pwndbg/aglib/disasm/arm.py | 9 +- pwndbg/aglib/disasm/disassembly.py | 566 ++++++++++++++++++++ pwndbg/aglib/disasm/mips.py | 7 +- pwndbg/aglib/disasm/riscv.py | 6 +- pwndbg/aglib/disasm/x86.py | 8 +- pwndbg/aglib/nearpc.py | 4 +- pwndbg/aglib/next.py | 20 +- pwndbg/aglib/tls.py | 2 +- pwndbg/arguments.py | 2 +- pwndbg/commands/branch.py | 14 +- pwndbg/commands/context.py | 8 +- pwndbg/commands/dev.py | 8 +- pwndbg/commands/dumpargs.py | 4 +- pwndbg/commands/rop.py | 2 +- pwndbg/commands/search.py | 2 +- pwndbg/emu/emulator.py | 8 +- pwndbg/enhance.py | 4 +- tests/qemu-tests/tests/user/test_aarch64.py | 25 +- 22 files changed, 646 insertions(+), 619 deletions(-) create mode 100644 pwndbg/aglib/disasm/disassembly.py diff --git a/pwndbg/aglib/__init__.py b/pwndbg/aglib/__init__.py index 1646c4fc3..08f0fe375 100644 --- a/pwndbg/aglib/__init__.py +++ b/pwndbg/aglib/__init__.py @@ -9,14 +9,6 @@ regs = None def load_aglib(): import pwndbg.aglib.argv import pwndbg.aglib.ctypes - import pwndbg.aglib.disasm - import pwndbg.aglib.disasm.aarch64 - import pwndbg.aglib.disasm.arm - import pwndbg.aglib.disasm.mips - import pwndbg.aglib.disasm.ppc - import pwndbg.aglib.disasm.riscv - import pwndbg.aglib.disasm.sparc - import pwndbg.aglib.disasm.x86 import pwndbg.aglib.dynamic import pwndbg.aglib.elf import pwndbg.aglib.file diff --git a/pwndbg/aglib/disasm/__init__.py b/pwndbg/aglib/disasm/__init__.py index 0aeeae518..e69de29bb 100644 --- a/pwndbg/aglib/disasm/__init__.py +++ b/pwndbg/aglib/disasm/__init__.py @@ -1,508 +0,0 @@ -""" -Functionality for disassmebling code at an address, or at an -address +/- a few instructions. -""" - -from __future__ import annotations - -import collections -import re -import sys -from typing import DefaultDict -from typing import List -from typing import Set -from typing import Tuple - -import capstone -from capstone import * # noqa: F403 - -import pwndbg -import pwndbg.aglib.arch -import pwndbg.aglib.disasm.arch -import pwndbg.aglib.memory -import pwndbg.emu.emulator -import pwndbg.lib.cache -from pwndbg.aglib.disasm.arch import DEBUG_ENHANCEMENT -from pwndbg.aglib.disasm.instruction import ManualPwndbgInstruction -from pwndbg.aglib.disasm.instruction import PwndbgInstruction -from pwndbg.aglib.disasm.instruction import PwndbgInstructionImpl -from pwndbg.aglib.disasm.instruction import SplitType -from pwndbg.color import message -from pwndbg.dbg import EventType - -if pwndbg.dbg.is_gdblib_available(): - import gdb - - -CapstoneArch = { - "arm": CS_ARCH_ARM, - "armcm": CS_ARCH_ARM, - "aarch64": CS_ARCH_AARCH64, - "i386": CS_ARCH_X86, - "i8086": CS_ARCH_X86, - "x86-64": CS_ARCH_X86, - "powerpc": CS_ARCH_PPC, - "mips": CS_ARCH_MIPS, - "sparc": CS_ARCH_SPARC, - "rv32": CS_ARCH_RISCV, - "rv64": CS_ARCH_RISCV, - "s390x": CS_ARCH_SYSTEMZ, -} - -CapstoneEndian = { - "little": CS_MODE_LITTLE_ENDIAN, - "big": CS_MODE_BIG_ENDIAN, -} - -CapstoneMode = {4: CS_MODE_32, 8: CS_MODE_64} - -CapstoneSyntax = {"intel": CS_OPT_SYNTAX_INTEL, "att": CS_OPT_SYNTAX_ATT} - -# For variable-instruction-width architectures -# (x86 and amd64), we keep a cache of instruction -# sizes, and where the end of the instruction falls. -# -# This allows us to consistently disassemble backward. -VariableInstructionSizeMax = { - "i386": 16, - "x86-64": 16, - "i8086": 16, - "mips": 8, - "rv32": 22, - "rv64": 22, - "s390x": 6, -} - - -# Caching strategy: -# To ensure we don't have stale register/memory information in our cached PwndbgInstruction, -# we clear the cache whenever we DON'T do a `stepi`, `nexti`, `step`, or `next` command. -# Although `stepi` and `nexti` always go to the next machine instruction in memory, `step` and `next` -# can skip over multiple when GDB has debugging symbols and sourcecode -# In order to determine that we did a `stepi`, `nexti`, `step`, or `next`, whenever the process stops, -# we check if the current program counter is at the address of one of the instructions that we -# emulated to the last time the process stopped. This allows use to skips a handful of instruction, but still retain the cache -# Any larger changes of the program counter will cause the cache to reset. - -next_addresses_cache: Set[int] = set() - - -# Register GDB event listeners for all stop events -@pwndbg.dbg.event_handler(EventType.STOP) -def enhance_cache_listener() -> None: - # Clear the register value cache to ensure we get the correct program counter value - pwndbg.aglib.regs.read_reg.cache.clear() # type: ignore[attr-defined] - - if pwndbg.aglib.regs.pc not in next_addresses_cache: - # Clear the enhanced instruction cache to ensure we don't use stale values - computed_instruction_cache.clear() - - -@pwndbg.dbg.event_handler(EventType.MEMORY_CHANGED) -@pwndbg.dbg.event_handler(EventType.REGISTER_CHANGED) -def clear_on_reg_mem_change() -> None: - # We clear all the future computed instructions because when we manually change a register or memory, it's often a location - # used by the instructions at or just after the current PC, and our previously emulated future instructions might be inaccurate - computed_instruction_cache.pop(pwndbg.aglib.regs.pc, None) - - for addr in next_addresses_cache: - computed_instruction_cache.pop(addr, None) - - next_addresses_cache.clear() - - -# Dict of Address -> previous Address executed -# Used to display instructions that led to current instruction -backward_cache: DefaultDict[int, int] = collections.defaultdict(lambda: None) - -# This allows use to retain the annotation strings from previous instructions -computed_instruction_cache: DefaultDict[int, PwndbgInstruction] = collections.defaultdict( - lambda: None -) - -# Maps an address to integer 0/1, indicating the Thumb mode bit for the given address. -# Value is None if Thumb bit irrelevent or unknown. -emulated_arm_mode_cache: DefaultDict[int, int] = collections.defaultdict(lambda: None) - - -@pwndbg.lib.cache.cache_until("objfile") -def get_disassembler_cached(arch, ptrsize: int, endian, extra=None): - arch = CapstoneArch[arch] - - if extra is None: - mode = CapstoneMode[ptrsize] - else: - mode = extra - - mode |= CapstoneEndian[endian] - - flavor = pwndbg.dbg.x86_disassembly_flavor() - - cs = Cs(arch, mode) - try: - cs.syntax = CapstoneSyntax[flavor] - except CsError: - pass - cs.detail = True - return cs - - -def get_disassembler(address): - if pwndbg.aglib.arch.name == "armcm": - thumb_mode = emulated_arm_mode_cache[address] - if thumb_mode is None: - thumb_mode = pwndbg.aglib.regs.xpsr & (1 << 24) - # novermin - extra = (CS_MODE_MCLASS | CS_MODE_THUMB) if thumb_mode else CS_MODE_MCLASS - - elif pwndbg.aglib.arch.name in ("arm", "aarch64"): - thumb_mode = emulated_arm_mode_cache[address] - if thumb_mode is None: - thumb_mode = pwndbg.aglib.regs.cpsr & (1 << 5) - extra = CS_MODE_THUMB if thumb_mode else CS_MODE_ARM - - elif pwndbg.aglib.arch.name == "sparc": - if pwndbg.dbg.is_gdblib_available() and "v9" in gdb.newest_frame().architecture().name(): - extra = CS_MODE_V9 - else: - # The ptrsize base modes cause capstone.CsError: Invalid mode (CS_ERR_MODE) - extra = 0 - - elif pwndbg.aglib.arch.name == "i8086": - extra = CS_MODE_16 - - elif ( - pwndbg.aglib.arch.name == "mips" - and pwndbg.dbg.is_gdblib_available() - and "isa32r6" in gdb.newest_frame().architecture().name() - ): - extra = CS_MODE_MIPS32R6 - - elif pwndbg.aglib.arch.name == "rv32": - extra = CS_MODE_RISCV32 | CS_MODE_RISCVC # novermin - elif pwndbg.aglib.arch.name == "rv64": - extra = CS_MODE_RISCV64 | CS_MODE_RISCVC # novermin - elif pwndbg.aglib.arch.name == "s390x": - # The ptrsize base modes cause capstone.CsError: Invalid mode (CS_ERR_MODE) - extra = 0 - else: - extra = None - - return get_disassembler_cached( - pwndbg.aglib.arch.name, pwndbg.aglib.arch.ptrsize, pwndbg.aglib.arch.endian, extra - ) - - -def get_one_instruction( - address, - emu: pwndbg.emu.emulator.Emulator = None, - enhance=True, - from_cache=False, - put_cache=False, -) -> PwndbgInstruction: - """ - If passed an emulator, this will pass it to the DisassemblyAssistant which will - single_step the emulator to determine the operand values before and after the instruction executes. - """ - if from_cache: - cached = computed_instruction_cache[address] - if cached is not None: - return cached - - if pwndbg.aglib.arch.name not in CapstoneArch: - return ManualPwndbgInstruction(address) - - md = get_disassembler(address) - size = VariableInstructionSizeMax.get(pwndbg.aglib.arch.name, 4) - data = pwndbg.aglib.memory.read(address, size, partial=True) - for ins in md.disasm(bytes(data), address, 1): - pwn_ins: PwndbgInstruction = PwndbgInstructionImpl(ins) - - if enhance: - pwndbg.aglib.disasm.arch.DisassemblyAssistant.enhance(pwn_ins, emu) - - if put_cache: - computed_instruction_cache[address] = pwn_ins - - return pwn_ins - - # Make linter happy. This shouldn't occur as md.disasm would crash first. - return None - - -# Return None on failure to fetch an instruction -def one( - address=None, - emu: pwndbg.emu.emulator.Emulator = None, - enhance=True, - from_cache=False, - put_cache=False, - put_backward_cache=True, -) -> PwndbgInstruction | None: - if address is None: - address = pwndbg.aglib.regs.pc - - if not pwndbg.aglib.memory.peek(address): - return None - - # A for loop in case this returns an empty list - for insn in get(address, 1, emu, enhance=enhance, from_cache=from_cache, put_cache=put_cache): - if put_backward_cache: - backward_cache[insn.next] = insn.address - return insn - - return None - - -# Get one instruction without enhancement -def one_raw(address=None) -> PwndbgInstruction | None: - if address is None: - address = pwndbg.aglib.regs.pc - - if not pwndbg.aglib.memory.peek(address): - return None - - return get_one_instruction(address, enhance=False) - - -def get( - address, - instructions=1, - emu: pwndbg.emu.emulator.Emulator = None, - enhance=True, - from_cache=False, - put_cache=False, -) -> List[PwndbgInstruction]: - address = int(address) - - # Dont disassemble if there's no memory - if not pwndbg.aglib.memory.peek(address): - return [] - - retval: List[PwndbgInstruction] = [] - for _ in range(instructions): - i = get_one_instruction( - address, emu, enhance=enhance, from_cache=from_cache, put_cache=put_cache - ) - if i is None: - break - address = i.next - retval.append(i) - - return retval - - -def can_run_first_emulate() -> bool: - """ - Disable the emulate config variable if we don't have enough memory to use it - See https://github.com/pwndbg/pwndbg/issues/1534 - And https://github.com/unicorn-engine/unicorn/pull/1743 - """ - global first_time_emulate - if not first_time_emulate: - return True - first_time_emulate = False - - try: - from mmap import mmap - - mm = mmap(-1, 1024 * 1024 * 1024) - mm.close() - except OSError: - print( - message.error( - "Disabling the emulation via Unicorn Engine that is used for computing branches" - " as there isn't enough memory (1GB) to use it (since mmap(1G, RWX) failed). See also:\n" - "* https://github.com/pwndbg/pwndbg/issues/1534\n" - "* https://github.com/unicorn-engine/unicorn/pull/1743\n" - "Either free your memory or explicitly set `set emulate off` in your Pwndbg config" - ) - ) - pwndbg.config.emulate.value = "off" - return False - - return True - - -first_time_emulate = True - - -def no_emulate_one(): - result = near(pwndbg.aglib.regs.pc, emulate=False, show_prev_insns=False) - if result: - return result[0][0] - return None - - -def emulate_one(): - result = near(pwndbg.aglib.regs.pc, emulate=True, show_prev_insns=False) - if result: - return result[0][0] - return None - - -def one_with_config(): - """ - Returns a single Pwndbg Instruction at the current PC. - - Emulation determined by the `pwndbg.config.emulate` setting. - """ - result = near( - pwndbg.aglib.regs.pc, - emulate=bool(not pwndbg.config.emulate == "off"), - show_prev_insns=False, - ) - if result: - return result[0][0] - return None - - -# Return (list of PwndbgInstructions, index in list where instruction.address = passed in address) -def near( - address, instructions=1, emulate=False, show_prev_insns=True, use_cache=False, linear=False -) -> Tuple[List[PwndbgInstruction], int]: - """ - Disasms instructions near given `address`. Passing `emulate` makes use of - unicorn engine to emulate instructions to predict branches that will be taken. - `show_prev_insns` makes this show previously cached instructions - (this is mostly used by context's disasm display, so user see what was previously) - """ - - pc = pwndbg.aglib.regs.pc - - # Some architecture aren't emulated yet - if not pwndbg.emu or pwndbg.aglib.arch.name not in pwndbg.emu.emulator.arch_to_UC: - emulate = False - - emu: pwndbg.emu.emulator.Emulator = None - - # Emulate if program pc is at the current instruction - can't emulate at arbitrary places, because we need current - # processor state to instantiate the emulator. - if address == pc and emulate and (not first_time_emulate or can_run_first_emulate()): - try: - emu = pwndbg.emu.emulator.Emulator() - except pwndbg.dbg_mod.Error as e: - match = re.search(r"Memory at address (\w+) unavailable\.", str(e)) - if match: - return ([], -1) - else: - raise - - # Start at the current instruction using emulation if available. - current = one(address, emu, put_cache=True) - - if DEBUG_ENHANCEMENT: - if emu and not emu.last_step_succeeded: - print("Emulator failed at first step") - - if current is None: - return ([], -1) - - insns: List[PwndbgInstruction] = [] - - # Get previously executed instructions from the cache. - if DEBUG_ENHANCEMENT: - print(f"CACHE START -------------------, {current.address}") - - if show_prev_insns: - cached = backward_cache[current.address] - insn = one(cached, from_cache=use_cache, put_backward_cache=False) if cached else None - while insn is not None and len(insns) < instructions: - if DEBUG_ENHANCEMENT: - print(f"Got instruction from cache, addr={cached:#x}") - if insn.jump_like and insn.split == SplitType.NO_SPLIT and not insn.causes_branch_delay: - insn.split = SplitType.BRANCH_NOT_TAKEN - insns.append(insn) - cached = backward_cache[insn.address] - insn = one(cached, from_cache=use_cache, put_backward_cache=False) if cached else None - insns.reverse() - - index_of_current_instruction = len(insns) - - insns.append(current) - - if DEBUG_ENHANCEMENT: - print("END CACHE -------------------") - - # At this point, we've already added everything *BEFORE* the requested address, - # and the instruction at 'address'. - # Now, continue forwards. - - next_addresses_cache.clear() - next_addresses_cache.add(current.target) - - insn = current - total_instructions = 1 + (2 * instructions) - - while insn and len(insns) < total_instructions: - target = insn.next if not linear else insn.address + insn.size - - # Emulation may have failed or been disabled in the last call to one() - if emu: - if not emu.last_step_succeeded or not emu.valid: - emu = None - else: - # Upon execution the previous instruction, the Thumb mode bit may have changed. - # This means we know whether the next instruction executed will be Thumb or not. - # This returns None in the case the Thumb bit is not relevent. - emulated_arm_mode_cache[emu.pc] = emu.read_thumb_bit() - - # Handle visual splits in the disasm view - # We create splits in 3 conditions: - # 1. We know the instruction is "jump_like" - it mutates the PC. We don't necessarily know the target, but know it can have one. - # 2. The instruction has an explicitly resolved target which is not the next instruction in memory - # 3. The instruction repeats (like x86 `REP`) - if insn.jump_like or insn.has_jump_target or insn.next == insn.address: - split_insn = insn - - # If this instruction has a delay slot, disassemble the delay slot instruction - # And append it to the list - if insn.causes_branch_delay: - # The Unicorn emulator forgets branch decisions when stopped inside of a - # delay slot. We disable emulation in this case - if emu: - emu.valid = False - - split_insn = one(insn.address + insn.size, None, put_cache=True) - insns.append(split_insn) - - # Manually make the backtracing cache correct - backward_cache[insn.next] = split_insn.address - backward_cache[split_insn.address + split_insn.size] = split_insn.address - backward_cache[split_insn.address] = insn.address - - # Because the emulator failed, we manually set the address of the next instruction. - # This is the address that typing "nexti" in GDB will take us to - target = split_insn.address + split_insn.size - - if not insn.call_like and ( - insn.is_unconditional_jump or insn.is_conditional_jump_taken - ): - target = insn.target - - if not linear and ( - insn.next != insn.address + insn.size or insn.force_unconditional_jump_target - ): - split_insn.split = SplitType.BRANCH_TAKEN - else: - split_insn.split = SplitType.BRANCH_NOT_TAKEN - - # Address to disassemble & emulate - next_addresses_cache.add(target) - - # The emulator is stepped within this call - insn = one(target, emu, put_cache=True) - - if insn: - insns.append(insn) - - # Remove repeated instructions at the end of disassembly. - # Always ensure we display the current and *next* instruction, - # but any repeats after that are removed. - # - # This helps with infinite loops and RET sleds. - - while insns and len(insns) > 2 and insns[-3].address == insns[-2].address == insns[-1].address: - del insns[-1] - - return (insns, index_of_current_instruction) diff --git a/pwndbg/aglib/disasm/aarch64.py b/pwndbg/aglib/disasm/aarch64.py index c86878b25..0f14ac0f7 100644 --- a/pwndbg/aglib/disasm/aarch64.py +++ b/pwndbg/aglib/disasm/aarch64.py @@ -241,8 +241,8 @@ def resolve_condition(condition: int, cpsr: int) -> InstructionCondition: return InstructionCondition.TRUE if condition else InstructionCondition.FALSE -class DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): - def __init__(self, architecture: str) -> None: +class AArch64DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): + def __init__(self, architecture) -> None: super().__init__(architecture) self.annotation_handlers: Dict[int, Callable[[PwndbgInstruction, Emulator], None]] = { @@ -543,6 +543,3 @@ class DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): target = target & ((1 << target_bit_width) - 1) return target - - -assistant = DisassemblyAssistant("aarch64") diff --git a/pwndbg/aglib/disasm/arch.py b/pwndbg/aglib/disasm/arch.py index d18c18fce..ea606fc78 100644 --- a/pwndbg/aglib/disasm/arch.py +++ b/pwndbg/aglib/disasm/arch.py @@ -27,6 +27,7 @@ from pwndbg.aglib.disasm.instruction import FORWARD_JUMP_GROUP from pwndbg.aglib.disasm.instruction import EnhancedOperand from pwndbg.aglib.disasm.instruction import InstructionCondition from pwndbg.aglib.disasm.instruction import PwndbgInstruction +from pwndbg.lib.arch import PWNDBG_SUPPORTED_ARCHITECTURES_TYPE # Emulator currently requires GDB, and we only use it here for type checking. if TYPE_CHECKING: @@ -139,12 +140,10 @@ def memory_or_register_assign(left: str, right: str, mem_assign: bool) -> str: # The enhance function is passed an instance of the Unicorn emulator # and will .single_step() it to determine operand values before and after executing the instruction class DisassemblyAssistant: - # Registry of all instances, {architecture: instance} - assistants: Dict[str, DisassemblyAssistant] = {} + architecture: PWNDBG_SUPPORTED_ARCHITECTURES_TYPE - def __init__(self, architecture: str) -> None: - if architecture is not None: - self.assistants[architecture] = self + def __init__(self, architecture: PWNDBG_SUPPORTED_ARCHITECTURES_TYPE) -> None: + self.architecture = architecture self.op_handlers: Dict[ int, Callable[[PwndbgInstruction, EnhancedOperand, Emulator], int | None] @@ -163,14 +162,13 @@ class DisassemblyAssistant: CS_OP_MEM: self._memory_string, } - @staticmethod - def for_current_arch() -> DisassemblyAssistant: - return DisassemblyAssistant.assistants.get(pwndbg.aglib.arch.name, None) + def enhance(self, instruction: PwndbgInstruction, emu: Emulator = None) -> None: + """ + Enhance the instruction - resolving branch targets, conditionals, and adding annotations - # Mutates the "instruction" object - @staticmethod - def enhance(instruction: PwndbgInstruction, emu: Emulator = None) -> None: - # Assumed that the emulator's pc is at the instruction's address + This is the only public method that should be called on this object externally. + """ + # It is assumed that the emulator's pc is at the instruction's address # There are 3 degrees of emulation: # 1. No emulation at all. In this case, the `emu` parameter should be None @@ -210,14 +208,10 @@ class DisassemblyAssistant: ) emu = jump_emu = None - enhancer: DisassemblyAssistant = DisassemblyAssistant.assistants.get( - pwndbg.aglib.arch.name, generic_assistant - ) - - enhancer._prepare(instruction, emu) + self._prepare(instruction, emu) # Don't disable emulation yet, as we can use it to read the syscall register - enhancer._enhance_syscall(instruction, emu) + self._enhance_syscall(instruction, emu) # Disable emulation for instructions we don't want to emulate (CALL, INT, ...) if emu and set(instruction.groups) & DO_NOT_EMULATE: @@ -228,7 +222,7 @@ class DisassemblyAssistant: print("Turned off emulation - not emulating certain type of instruction") # This function will .single_step the emulation - if not enhancer._enhance_operands(instruction, emu, jump_emu): + if not self._enhance_operands(instruction, emu, jump_emu): if jump_emu is not None and DEBUG_ENHANCEMENT: print(f"Emulation failed at {instruction.address=:#x}") emu = None @@ -239,13 +233,13 @@ class DisassemblyAssistant: instruction.emulated = True # Set the .condition field - enhancer._enhance_conditional(instruction, emu) + self._enhance_conditional(instruction, emu) # Set the .target and .next fields - enhancer._enhance_next(instruction, emu, jump_emu) + self._enhance_next(instruction, emu, jump_emu) if bool(pwndbg.config.disasm_annotations): - enhancer._set_annotation_string(instruction, emu) + self._set_annotation_string(instruction, emu) # Disable emulation after CALL instructions. We do it after enhancement, as we can use emulation # to determine the call's target address. @@ -258,7 +252,7 @@ class DisassemblyAssistant: print("Turned off emulation for call") if DEBUG_ENHANCEMENT: - print(enhancer.dump(instruction)) + print(self.dump(instruction)) print("Done enhancing") # This is run before enhancement - often used to handle edge case behavior @@ -1052,6 +1046,3 @@ class DisassemblyAssistant: instruction.annotation = memory_or_register_assign( target_operand.str, math_string, memory_assignment ) - - -generic_assistant = DisassemblyAssistant(None) diff --git a/pwndbg/aglib/disasm/arm.py b/pwndbg/aglib/disasm/arm.py index 1ce8ce5f5..80575b198 100644 --- a/pwndbg/aglib/disasm/arm.py +++ b/pwndbg/aglib/disasm/arm.py @@ -124,8 +124,8 @@ ARM_CAN_WRITE_TO_PC_INSTRUCTIONS = { # This class enhances both ARM A-profile and ARM M-profile (Cortex-M) -class DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): - def __init__(self, architecture: str, flags_reg: Literal["cpsr", "xpsr"]) -> None: +class ArmDisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): + def __init__(self, architecture, flags_reg: Literal["cpsr", "xpsr"]) -> None: super().__init__(architecture) self.flags_reg = flags_reg @@ -385,8 +385,3 @@ class DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): ) return target - - -# Register the assistant for both ARM A-profile and ARM M-profile -assistant = DisassemblyAssistant("arm", "cpsr") -assistant = DisassemblyAssistant("armcm", "xpsr") diff --git a/pwndbg/aglib/disasm/disassembly.py b/pwndbg/aglib/disasm/disassembly.py new file mode 100644 index 000000000..b850c71ca --- /dev/null +++ b/pwndbg/aglib/disasm/disassembly.py @@ -0,0 +1,566 @@ +""" +Functionality for disassmebling code at an address, or at an +address +/- a few instructions. +""" + +from __future__ import annotations + +import collections +import re +from typing import Callable +from typing import DefaultDict +from typing import Dict +from typing import List +from typing import Set +from typing import Tuple + +from capstone import * # noqa: F403 + +import pwndbg +import pwndbg.aglib.arch +import pwndbg.aglib.disasm.aarch64 +import pwndbg.aglib.disasm.arm +import pwndbg.aglib.disasm.disassembly +import pwndbg.aglib.disasm.mips +import pwndbg.aglib.disasm.riscv +import pwndbg.aglib.disasm.x86 +import pwndbg.aglib.memory +import pwndbg.emu.emulator +import pwndbg.lib.cache +from pwndbg.aglib.disasm.arch import DEBUG_ENHANCEMENT +from pwndbg.aglib.disasm.arch import DisassemblyAssistant +from pwndbg.aglib.disasm.instruction import ManualPwndbgInstruction +from pwndbg.aglib.disasm.instruction import PwndbgInstruction +from pwndbg.aglib.disasm.instruction import PwndbgInstructionImpl +from pwndbg.aglib.disasm.instruction import SplitType +from pwndbg.color import message +from pwndbg.dbg import EventType +from pwndbg.lib.arch import PWNDBG_SUPPORTED_ARCHITECTURES_TYPE + +if pwndbg.dbg.is_gdblib_available(): + import gdb + + +CapstoneArch = { + "arm": CS_ARCH_ARM, + "armcm": CS_ARCH_ARM, + "aarch64": CS_ARCH_AARCH64, + "i386": CS_ARCH_X86, + "i8086": CS_ARCH_X86, + "x86-64": CS_ARCH_X86, + "powerpc": CS_ARCH_PPC, + "mips": CS_ARCH_MIPS, + "sparc": CS_ARCH_SPARC, + "rv32": CS_ARCH_RISCV, + "rv64": CS_ARCH_RISCV, + "s390x": CS_ARCH_SYSTEMZ, +} + +CapstoneEndian = { + "little": CS_MODE_LITTLE_ENDIAN, + "big": CS_MODE_BIG_ENDIAN, +} + +CapstoneMode = {4: CS_MODE_32, 8: CS_MODE_64} + +CapstoneSyntax = {"intel": CS_OPT_SYNTAX_INTEL, "att": CS_OPT_SYNTAX_ATT} + +# For variable-instruction-width architectures +# (x86 and amd64), we keep a cache of instruction +# sizes, and where the end of the instruction falls. +# +# This allows us to consistently disassemble backward. +VariableInstructionSizeMax = { + "i386": 16, + "x86-64": 16, + "i8086": 16, + "mips": 8, + "rv32": 22, + "rv64": 22, + "s390x": 6, +} + + +# Caching strategy: +# To ensure we don't have stale register/memory information in our cached PwndbgInstruction, +# we clear the cache whenever we DON'T do a `stepi`, `nexti`, `step`, or `next` command. +# Although `stepi` and `nexti` always go to the next machine instruction in memory, `step` and `next` +# can skip over multiple when GDB has debugging symbols and sourcecode +# In order to determine that we did a `stepi`, `nexti`, `step`, or `next`, whenever the process stops, +# we check if the current program counter is at the address of one of the instructions that we +# emulated to the last time the process stopped. This allows use to skips a handful of instruction, but still retain the cache +# Any larger changes of the program counter will cause the cache to reset. + +next_addresses_cache: Set[int] = set() + + +# Register GDB event listeners for all stop events +@pwndbg.dbg.event_handler(EventType.STOP) +def enhance_cache_listener() -> None: + # Clear the register value cache to ensure we get the correct program counter value + pwndbg.aglib.regs.read_reg.cache.clear() # type: ignore[attr-defined] + + if pwndbg.aglib.regs.pc not in next_addresses_cache: + # Clear the enhanced instruction cache to ensure we don't use stale values + computed_instruction_cache.clear() + + +@pwndbg.dbg.event_handler(EventType.MEMORY_CHANGED) +@pwndbg.dbg.event_handler(EventType.REGISTER_CHANGED) +def clear_on_reg_mem_change() -> None: + # We clear all the future computed instructions because when we manually change a register or memory, it's often a location + # used by the instructions at or just after the current PC, and our previously emulated future instructions might be inaccurate + computed_instruction_cache.pop(pwndbg.aglib.regs.pc, None) + + for addr in next_addresses_cache: + computed_instruction_cache.pop(addr, None) + + next_addresses_cache.clear() + + +# Dict of Address -> previous Address executed +# Used to display instructions that led to current instruction +backward_cache: DefaultDict[int, int] = collections.defaultdict(lambda: None) + +# This allows use to retain the annotation strings from previous instructions +computed_instruction_cache: DefaultDict[int, PwndbgInstruction] = collections.defaultdict( + lambda: None +) + +# Maps an address to integer 0/1, indicating the Thumb mode bit for the given address. +# Value is None if Thumb bit irrelevent or unknown. +emulated_arm_mode_cache: DefaultDict[int, int] = collections.defaultdict(lambda: None) + + +@pwndbg.lib.cache.cache_until("objfile") +def get_disassembler_cached(arch, ptrsize: int, endian, extra=None): + arch = CapstoneArch[arch] + + if extra is None: + mode = CapstoneMode[ptrsize] + else: + mode = extra + + mode |= CapstoneEndian[endian] + + flavor = pwndbg.dbg.x86_disassembly_flavor() + + cs = Cs(arch, mode) + try: + cs.syntax = CapstoneSyntax[flavor] + except CsError: + pass + cs.detail = True + return cs + + +def get_disassembler(address): + if pwndbg.aglib.arch.name == "armcm": + thumb_mode = emulated_arm_mode_cache[address] + if thumb_mode is None: + thumb_mode = pwndbg.aglib.regs.xpsr & (1 << 24) + # novermin + extra = (CS_MODE_MCLASS | CS_MODE_THUMB) if thumb_mode else CS_MODE_MCLASS + + elif pwndbg.aglib.arch.name in ("arm", "aarch64"): + thumb_mode = emulated_arm_mode_cache[address] + if thumb_mode is None: + thumb_mode = pwndbg.aglib.regs.cpsr & (1 << 5) + extra = CS_MODE_THUMB if thumb_mode else CS_MODE_ARM + + elif pwndbg.aglib.arch.name == "sparc": + if pwndbg.dbg.is_gdblib_available() and "v9" in gdb.newest_frame().architecture().name(): + extra = CS_MODE_V9 + else: + # The ptrsize base modes cause capstone.CsError: Invalid mode (CS_ERR_MODE) + extra = 0 + + elif pwndbg.aglib.arch.name == "i8086": + extra = CS_MODE_16 + + elif ( + pwndbg.aglib.arch.name == "mips" + and pwndbg.dbg.is_gdblib_available() + and "isa32r6" in gdb.newest_frame().architecture().name() + ): + extra = CS_MODE_MIPS32R6 + + elif pwndbg.aglib.arch.name == "rv32": + extra = CS_MODE_RISCV32 | CS_MODE_RISCVC # novermin + elif pwndbg.aglib.arch.name == "rv64": + extra = CS_MODE_RISCV64 | CS_MODE_RISCVC # novermin + elif pwndbg.aglib.arch.name == "s390x": + # The ptrsize base modes cause capstone.CsError: Invalid mode (CS_ERR_MODE) + extra = 0 + else: + extra = None + + return get_disassembler_cached( + pwndbg.aglib.arch.name, pwndbg.aglib.arch.ptrsize, pwndbg.aglib.arch.endian, extra + ) + + +def get_one_instruction( + address, + emu: pwndbg.emu.emulator.Emulator = None, + enhance=True, + from_cache=False, + put_cache=False, + assistant: DisassemblyAssistant = None, +) -> PwndbgInstruction: + """ + If passed an emulator, this will pass it to the DisassemblyAssistant which will + single_step the emulator to determine the operand values before and after the instruction executes. + """ + if from_cache: + cached = computed_instruction_cache[address] + if cached is not None: + return cached + + if pwndbg.aglib.arch.name not in CapstoneArch: + return ManualPwndbgInstruction(address) + + md = get_disassembler(address) + size = VariableInstructionSizeMax.get(pwndbg.aglib.arch.name, 4) + data = pwndbg.aglib.memory.read(address, size, partial=True) + for ins in md.disasm(bytes(data), address, 1): + pwn_ins: PwndbgInstruction = PwndbgInstructionImpl(ins) + + if enhance: + if assistant is None: + assistant = ( + pwndbg.aglib.disasm.disassembly.get_disassembly_assistant_for_current_arch() + ) + assistant.enhance(pwn_ins, emu) + + if put_cache: + computed_instruction_cache[address] = pwn_ins + + return pwn_ins + + # Make linter happy. This shouldn't occur as md.disasm would crash first. + return None + + +# Return None on failure to fetch an instruction +def one( + address=None, + emu: pwndbg.emu.emulator.Emulator = None, + enhance=True, + from_cache=False, + put_cache=False, + put_backward_cache=True, + assistant: DisassemblyAssistant = None, +) -> PwndbgInstruction | None: + if address is None: + address = pwndbg.aglib.regs.pc + + if not pwndbg.aglib.memory.peek(address): + return None + + # A for loop in case this returns an empty list + for insn in get( + address, + 1, + emu, + enhance=enhance, + from_cache=from_cache, + put_cache=put_cache, + assistant=assistant, + ): + if put_backward_cache: + backward_cache[insn.next] = insn.address + return insn + + return None + + +# Get one instruction without enhancement +def one_raw(address=None) -> PwndbgInstruction | None: + if address is None: + address = pwndbg.aglib.regs.pc + + if not pwndbg.aglib.memory.peek(address): + return None + + return get_one_instruction(address, enhance=False) + + +def get( + address, + instructions=1, + emu: pwndbg.emu.emulator.Emulator = None, + enhance=True, + from_cache=False, + put_cache=False, + assistant: DisassemblyAssistant = None, +) -> List[PwndbgInstruction]: + address = int(address) + + # Dont disassemble if there's no memory + if not pwndbg.aglib.memory.peek(address): + return [] + + retval: List[PwndbgInstruction] = [] + for _ in range(instructions): + i = get_one_instruction( + address, + emu, + enhance=enhance, + from_cache=from_cache, + put_cache=put_cache, + assistant=assistant, + ) + if i is None: + break + address = i.next + retval.append(i) + + return retval + + +def can_run_first_emulate() -> bool: + """ + Disable the emulate config variable if we don't have enough memory to use it + See https://github.com/pwndbg/pwndbg/issues/1534 + And https://github.com/unicorn-engine/unicorn/pull/1743 + """ + global first_time_emulate + if not first_time_emulate: + return True + first_time_emulate = False + + try: + from mmap import mmap + + mm = mmap(-1, 1024 * 1024 * 1024) + mm.close() + except OSError: + print( + message.error( + "Disabling the emulation via Unicorn Engine that is used for computing branches" + " as there isn't enough memory (1GB) to use it (since mmap(1G, RWX) failed). See also:\n" + "* https://github.com/pwndbg/pwndbg/issues/1534\n" + "* https://github.com/unicorn-engine/unicorn/pull/1743\n" + "Either free your memory or explicitly set `set emulate off` in your Pwndbg config" + ) + ) + pwndbg.config.emulate.value = "off" + return False + + return True + + +first_time_emulate = True + + +def no_emulate_one(): + result = near(pwndbg.aglib.regs.pc, emulate=False, show_prev_insns=False) + if result: + return result[0][0] + return None + + +def emulate_one(): + result = near(pwndbg.aglib.regs.pc, emulate=True, show_prev_insns=False) + if result: + return result[0][0] + return None + + +def one_with_config(): + """ + Returns a single Pwndbg Instruction at the current PC. + + Emulation determined by the `pwndbg.config.emulate` setting. + """ + result = near( + pwndbg.aglib.regs.pc, + emulate=bool(not pwndbg.config.emulate == "off"), + show_prev_insns=False, + ) + if result: + return result[0][0] + return None + + +# Return (list of PwndbgInstructions, index in list where instruction.address = passed in address) +def near( + address, instructions=1, emulate=False, show_prev_insns=True, use_cache=False, linear=False +) -> Tuple[List[PwndbgInstruction], int]: + """ + Disasms instructions near given `address`. Passing `emulate` makes use of + unicorn engine to emulate instructions to predict branches that will be taken. + `show_prev_insns` makes this show previously cached instructions + (this is mostly used by context's disasm display, so user see what was previously) + """ + + pc = pwndbg.aglib.regs.pc + + # Some architecture aren't emulated yet + if not pwndbg.emu or pwndbg.aglib.arch.name not in pwndbg.emu.emulator.arch_to_UC: + emulate = False + + emu: pwndbg.emu.emulator.Emulator = None + + # Emulate if program pc is at the current instruction - can't emulate at arbitrary places, because we need current + # processor state to instantiate the emulator. + if address == pc and emulate and (not first_time_emulate or can_run_first_emulate()): + try: + emu = pwndbg.emu.emulator.Emulator() + except pwndbg.dbg_mod.Error as e: + match = re.search(r"Memory at address (\w+) unavailable\.", str(e)) + if match: + return ([], -1) + else: + raise + + # By using the same assistant for all the instructions disassembled in this pass, we can track and share information across the instructions + assistant = pwndbg.aglib.disasm.disassembly.get_disassembly_assistant_for_current_arch() + + # Start at the current instruction using emulation if available. + current = one(address, emu, put_cache=True, assistant=assistant) + + if DEBUG_ENHANCEMENT: + if emu and not emu.last_step_succeeded: + print("Emulator failed at first step") + + if current is None: + return ([], -1) + + insns: List[PwndbgInstruction] = [] + + # Get previously executed instructions from the cache. + if DEBUG_ENHANCEMENT: + print(f"CACHE START -------------------, {current.address}") + + if show_prev_insns: + cached = backward_cache[current.address] + insn = one(cached, from_cache=use_cache, put_backward_cache=False) if cached else None + while insn is not None and len(insns) < instructions: + if DEBUG_ENHANCEMENT: + print(f"Got instruction from cache, addr={cached:#x}") + if insn.jump_like and insn.split == SplitType.NO_SPLIT and not insn.causes_branch_delay: + insn.split = SplitType.BRANCH_NOT_TAKEN + insns.append(insn) + cached = backward_cache[insn.address] + insn = one(cached, from_cache=use_cache, put_backward_cache=False) if cached else None + insns.reverse() + + index_of_current_instruction = len(insns) + + insns.append(current) + + if DEBUG_ENHANCEMENT: + print("END CACHE -------------------") + + # At this point, we've already added everything *BEFORE* the requested address, + # and the instruction at 'address'. + # Now, continue forwards. + + next_addresses_cache.clear() + next_addresses_cache.add(current.target) + + insn = current + total_instructions = 1 + (2 * instructions) + + while insn and len(insns) < total_instructions: + target = insn.next if not linear else insn.address + insn.size + + # Emulation may have failed or been disabled in the last call to one() + if emu: + if not emu.last_step_succeeded or not emu.valid: + emu = None + else: + # Upon execution the previous instruction, the Thumb mode bit may have changed. + # This means we know whether the next instruction executed will be Thumb or not. + # This returns None in the case the Thumb bit is not relevent. + emulated_arm_mode_cache[emu.pc] = emu.read_thumb_bit() + + # Handle visual splits in the disasm view + # We create splits in 3 conditions: + # 1. We know the instruction is "jump_like" - it mutates the PC. We don't necessarily know the target, but know it can have one. + # 2. The instruction has an explicitly resolved target which is not the next instruction in memory + # 3. The instruction repeats (like x86 `REP`) + if insn.jump_like or insn.has_jump_target or insn.next == insn.address: + split_insn = insn + + # If this instruction has a delay slot, disassemble the delay slot instruction + # And append it to the list + if insn.causes_branch_delay: + # The Unicorn emulator forgets branch decisions when stopped inside of a + # delay slot. We disable emulation in this case + if emu: + emu.valid = False + + split_insn = one(insn.address + insn.size, None, put_cache=True) + insns.append(split_insn) + + # Manually make the backtracing cache correct + backward_cache[insn.next] = split_insn.address + backward_cache[split_insn.address + split_insn.size] = split_insn.address + backward_cache[split_insn.address] = insn.address + + # Because the emulator failed, we manually set the address of the next instruction. + # This is the address that typing "nexti" in GDB will take us to + target = split_insn.address + split_insn.size + + if not insn.call_like and ( + insn.is_unconditional_jump or insn.is_conditional_jump_taken + ): + target = insn.target + + if not linear and ( + insn.next != insn.address + insn.size or insn.force_unconditional_jump_target + ): + split_insn.split = SplitType.BRANCH_TAKEN + else: + split_insn.split = SplitType.BRANCH_NOT_TAKEN + + # Address to disassemble & emulate + next_addresses_cache.add(target) + + # The emulator is stepped within this call + insn = one(target, emu, put_cache=True, assistant=assistant) + + if insn: + insns.append(insn) + + # Remove repeated instructions at the end of disassembly. + # Always ensure we display the current and *next* instruction, + # but any repeats after that are removed. + # + # This helps with infinite loops and RET sleds. + + while insns and len(insns) > 2 and insns[-3].address == insns[-2].address == insns[-1].address: + del insns[-1] + + return (insns, index_of_current_instruction) + + +ALL_DISASSEMBLY_ASSISTANTS: Dict[ + PWNDBG_SUPPORTED_ARCHITECTURES_TYPE, Callable[[], DisassemblyAssistant] +] = { + "aarch64": lambda: pwndbg.aglib.disasm.aarch64.AArch64DisassemblyAssistant("aarch64"), + "i386": lambda: pwndbg.aglib.disasm.x86.X86DisassemblyAssistant("i386"), + "x86-64": lambda: pwndbg.aglib.disasm.x86.X86DisassemblyAssistant("x86-64"), + "arm": lambda: pwndbg.aglib.disasm.arm.ArmDisassemblyAssistant("arm", "cpsr"), + "armcm": lambda: pwndbg.aglib.disasm.arm.ArmDisassemblyAssistant("armcm", "xpsr"), + "mips": lambda: pwndbg.aglib.disasm.mips.MipsDisassemblyAssistant("mips"), + "rv32": lambda: pwndbg.aglib.disasm.riscv.RISCVDisassemblyAssistant("rv32"), + "rv64": lambda: pwndbg.aglib.disasm.riscv.RISCVDisassemblyAssistant("rv64"), +} + + +def get_disassembly_assistant_for_current_arch() -> DisassemblyAssistant: + # If a specific subclass has not been created for the given arc, return the generic assistant + return ALL_DISASSEMBLY_ASSISTANTS.get( + pwndbg.aglib.arch.name, lambda: DisassemblyAssistant(None) + )() + + +def arch_has_disassembly_assistant(arch: PWNDBG_SUPPORTED_ARCHITECTURES_TYPE | None = None) -> bool: + if arch is None: + arch = pwndbg.aglib.arch.name + + return arch in ALL_DISASSEMBLY_ASSISTANTS diff --git a/pwndbg/aglib/disasm/mips.py b/pwndbg/aglib/disasm/mips.py index a480f0c4a..8cc512fbe 100644 --- a/pwndbg/aglib/disasm/mips.py +++ b/pwndbg/aglib/disasm/mips.py @@ -192,8 +192,8 @@ MIPS_BINARY_OPERATIONS = { # This class enhances 32-bit, 64-bit, and micro MIPS -class DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): - def __init__(self, architecture: str) -> None: +class MipsDisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): + def __init__(self, architecture) -> None: super().__init__(architecture) self.annotation_handlers: Dict[int, Callable[[PwndbgInstruction, Emulator], None]] = { @@ -303,6 +303,3 @@ class DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): if base is None: return None return base + op.mem.disp - - -assistant = DisassemblyAssistant("mips") diff --git a/pwndbg/aglib/disasm/riscv.py b/pwndbg/aglib/disasm/riscv.py index 3558955cf..bc008389a 100644 --- a/pwndbg/aglib/disasm/riscv.py +++ b/pwndbg/aglib/disasm/riscv.py @@ -113,7 +113,7 @@ RISCV_EMULATED_ANNOTATIONS = { } -class DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): +class RISCVDisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): def __init__(self, architecture) -> None: super().__init__(architecture) self.architecture = architecture @@ -291,7 +291,3 @@ class DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): if base is None: return None return base + op.mem.disp - - -assistant_rv32 = DisassemblyAssistant("rv32") -assistant_rv64 = DisassemblyAssistant("rv64") diff --git a/pwndbg/aglib/disasm/x86.py b/pwndbg/aglib/disasm/x86.py index f9ca9b89e..284e75c72 100644 --- a/pwndbg/aglib/disasm/x86.py +++ b/pwndbg/aglib/disasm/x86.py @@ -49,8 +49,8 @@ X86_MATH_INSTRUCTIONS = { # This class handles enhancement for x86 and x86_64. This is because Capstone itself # represents both architectures using the same class -class DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): - def __init__(self, architecture: str) -> None: +class X86DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): + def __init__(self, architecture) -> None: super().__init__(architecture) self.annotation_handlers: Dict[int, Callable[[PwndbgInstruction, Emulator], None]] = { @@ -437,7 +437,3 @@ class DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): sz += f"{abs(disp):#x}" return f"[{sz}]" - - -assistant = DisassemblyAssistant("i386") -assistant = DisassemblyAssistant("x86-64") diff --git a/pwndbg/aglib/nearpc.py b/pwndbg/aglib/nearpc.py index 6f19f82bb..506048aee 100644 --- a/pwndbg/aglib/nearpc.py +++ b/pwndbg/aglib/nearpc.py @@ -5,7 +5,7 @@ from typing import List from capstone import * # noqa: F403 import pwndbg -import pwndbg.aglib.disasm +import pwndbg.aglib.disasm.disassembly import pwndbg.aglib.regs import pwndbg.aglib.strings import pwndbg.aglib.symbol @@ -145,7 +145,7 @@ def nearpc( # for line in symtab.linetable(): # pc_to_linenos[line.pc].append(line.line) - instructions, index_of_pc = pwndbg.aglib.disasm.near( + instructions, index_of_pc = pwndbg.aglib.disasm.disassembly.near( pc, lines, emulate=emulate, show_prev_insns=not repeat, use_cache=use_cache, linear=linear ) diff --git a/pwndbg/aglib/next.py b/pwndbg/aglib/next.py index c66a97647..8396cbd85 100644 --- a/pwndbg/aglib/next.py +++ b/pwndbg/aglib/next.py @@ -10,7 +10,7 @@ from itertools import chain import capstone -import pwndbg.aglib.disasm +import pwndbg.aglib.disasm.disassembly import pwndbg.aglib.proc import pwndbg.aglib.regs from pwndbg.aglib.disasm.instruction import PwndbgInstruction @@ -30,20 +30,20 @@ def next_int(address=None, honor_current_branch=False): If no interrupt exists or a jump is in the way, return None. """ if address is None: - ins = pwndbg.aglib.disasm.one(pwndbg.aglib.regs.pc) + ins = pwndbg.aglib.disasm.disassembly.one(pwndbg.aglib.regs.pc) if not ins: return None if honor_current_branch and ins.jump_like: return None address = ins.next - ins = pwndbg.aglib.disasm.one(address) + ins = pwndbg.aglib.disasm.disassembly.one(address) while ins: if ins.jump_like: return None elif ins.groups & interrupts: return ins - ins = pwndbg.aglib.disasm.one(ins.next) + ins = pwndbg.aglib.disasm.disassembly.one(ins.next) return None @@ -55,18 +55,18 @@ def next_branch(address=None, including_current=False) -> PwndbgInstruction | No If including_current == True, then if the instruction at the address is already a branch, return it. """ if address is None: - ins = pwndbg.aglib.disasm.one(pwndbg.aglib.regs.pc) + ins = pwndbg.aglib.disasm.disassembly.one(pwndbg.aglib.regs.pc) if not ins: return None if including_current and ins.jump_like: return ins address = ins.next - ins = pwndbg.aglib.disasm.one(address) + ins = pwndbg.aglib.disasm.disassembly.one(address) while ins: if ins.jump_like: return ins - ins = pwndbg.aglib.disasm.one(ins.next) + ins = pwndbg.aglib.disasm.disassembly.one(ins.next) return None @@ -79,7 +79,7 @@ def next_matching_until_branch(address=None, mnemonic=None, op_str=None): if address is None: address = pwndbg.aglib.regs.pc - ins = pwndbg.aglib.disasm.one(address) + ins = pwndbg.aglib.disasm.disassembly.one(address) while ins: # Check whether or not the mnemonic matches if it was specified mnemonic_match = ins.mnemonic.casefold() == mnemonic.casefold() if mnemonic else True @@ -109,7 +109,7 @@ def next_matching_until_branch(address=None, mnemonic=None, op_str=None): # not trying to match the branch instruction itself. return None - ins = pwndbg.aglib.disasm.one(ins.next) + ins = pwndbg.aglib.disasm.disassembly.one(ins.next) return None @@ -277,7 +277,7 @@ async def break_on_program_code(ec: pwndbg.dbg_mod.ExecutionController) -> bool: async def break_on_next(ec: pwndbg.dbg_mod.ExecutionController, address=None) -> None: address = address or pwndbg.aglib.regs.pc - ins = pwndbg.aglib.disasm.one(address) + ins = pwndbg.aglib.disasm.disassembly.one(address) proc = pwndbg.dbg.selected_inferior() with proc.break_at(BreakpointLocation(ins.address + ins.size), internal=True) as bp: diff --git a/pwndbg/aglib/tls.py b/pwndbg/aglib/tls.py index 028ebb6be..e51a5ad98 100644 --- a/pwndbg/aglib/tls.py +++ b/pwndbg/aglib/tls.py @@ -5,7 +5,7 @@ Getting Thread Local Storage (TLS) information. from __future__ import annotations import pwndbg.aglib.arch -import pwndbg.aglib.disasm +import pwndbg.aglib.disasm.disassembly import pwndbg.aglib.memory import pwndbg.aglib.regs import pwndbg.aglib.symbol diff --git a/pwndbg/arguments.py b/pwndbg/arguments.py index d96b984be..d07c39481 100644 --- a/pwndbg/arguments.py +++ b/pwndbg/arguments.py @@ -11,8 +11,8 @@ from typing import Tuple from capstone import CS_GRP_INT import pwndbg.aglib.arch -import pwndbg.aglib.disasm import pwndbg.aglib.disasm.arch +import pwndbg.aglib.disasm.disassembly import pwndbg.aglib.file import pwndbg.aglib.memory import pwndbg.aglib.proc diff --git a/pwndbg/commands/branch.py b/pwndbg/commands/branch.py index 0335a83ae..8faacbab5 100644 --- a/pwndbg/commands/branch.py +++ b/pwndbg/commands/branch.py @@ -5,8 +5,7 @@ import argparse import gdb from capstone import CS_GRP_JUMP -import pwndbg.aglib.disasm -import pwndbg.aglib.disasm.arch +import pwndbg.aglib.disasm.disassembly import pwndbg.color.message as message import pwndbg.commands import pwndbg.gdblib.bpoint @@ -25,9 +24,10 @@ class BreakOnConditionalBranch(pwndbg.gdblib.bpoint.Breakpoint): self.taken = taken def should_stop(self): - # Use the assistant to figure out which if all the conditions this - # branch requires in order to be taken have been met. - assistant = pwndbg.aglib.disasm.arch.DisassemblyAssistant.for_current_arch() + # We need to re-run the enhancement process, since now the PC == instruction.address, + # where previously it was not. The enhancement process will figure out if all the conditions + # this branch requires in order to be taken have been met. + assistant = pwndbg.aglib.disasm.disassembly.get_disassembly_assistant_for_current_arch() assistant.enhance(self.instruction) condition_met = self.instruction.is_conditional_jump_taken @@ -91,7 +91,7 @@ def install_breakpoint(branch, taken: bool) -> None: return # We should've picked something by now, or errored out. - instruction = pwndbg.aglib.disasm.one(address) + instruction = pwndbg.aglib.disasm.disassembly.one(address) if instruction is None: print(message.error(f"Could not decode instruction at address {address:#x}")) return @@ -104,7 +104,7 @@ def install_breakpoint(branch, taken: bool) -> None: return # Not all architectures have assistants we can use for conditionals. - if pwndbg.aglib.disasm.arch.DisassemblyAssistant.for_current_arch() is None: + if not pwndbg.aglib.disasm.disassembly.arch_has_disassembly_assistant(): print( message.error( "The current architecture is not supported for breaking on conditional branches" diff --git a/pwndbg/commands/context.py b/pwndbg/commands/context.py index 293781351..5aff98587 100644 --- a/pwndbg/commands/context.py +++ b/pwndbg/commands/context.py @@ -21,7 +21,7 @@ from typing_extensions import ParamSpec import pwndbg import pwndbg.aglib.arch -import pwndbg.aglib.disasm +import pwndbg.aglib.disasm.disassembly import pwndbg.aglib.nearpc import pwndbg.aglib.regs import pwndbg.aglib.symbol @@ -976,10 +976,10 @@ def try_emulate_if_bug_disable(handler: Callable[[], T]) -> T: @serve_context_history def context_disasm(target=sys.stdout, with_banner=True, width=None): flavor = pwndbg.dbg.x86_disassembly_flavor() - syntax = pwndbg.aglib.disasm.CapstoneSyntax[flavor] + syntax = pwndbg.aglib.disasm.disassembly.CapstoneSyntax[flavor] # Get the Capstone object to set disassembly syntax - cs = next(iter(pwndbg.aglib.disasm.get_disassembler_cached.cache.values()), None) + cs = next(iter(pwndbg.aglib.disasm.disassembly.get_disassembler_cached.cache.values()), None) # The `None` case happens when the cache was not filled yet (see e.g. #881) if cs is not None and cs.syntax != syntax: @@ -1199,7 +1199,7 @@ def context_backtrace(with_banner=True, target=sys.stdout, width=None): @serve_context_history def context_args(with_banner=True, target=sys.stdout, width=None): - args = pwndbg.arguments.format_args(pwndbg.aglib.disasm.one()) + args = pwndbg.arguments.format_args(pwndbg.aglib.disasm.disassembly.one()) # early exit to skip section if no arg found if not args: diff --git a/pwndbg/commands/dev.py b/pwndbg/commands/dev.py index 0a5e3c526..03c6a117f 100644 --- a/pwndbg/commands/dev.py +++ b/pwndbg/commands/dev.py @@ -3,7 +3,7 @@ from __future__ import annotations import argparse import logging -import pwndbg.aglib.disasm +import pwndbg.aglib.disasm.disassembly import pwndbg.color.message as MessageColor import pwndbg.commands from pwndbg.commands import CommandCategory @@ -43,7 +43,9 @@ parser.add_argument( def dev_dump_instruction(address=None, force_emulate=False, no_emulate=False) -> None: if address is not None: address = int(address) - cached_instruction = pwndbg.aglib.disasm.computed_instruction_cache.get(address, None) + cached_instruction = pwndbg.aglib.disasm.disassembly.computed_instruction_cache.get( + address, None + ) if cached_instruction: print(repr(cached_instruction)) else: @@ -56,7 +58,7 @@ def dev_dump_instruction(address=None, force_emulate=False, no_emulate=False) -> bool(pwndbg.config.emulate == "on") if override_setting is None else override_setting ) - instructions, index_of_pc = pwndbg.aglib.disasm.near( + instructions, index_of_pc = pwndbg.aglib.disasm.disassembly.near( pwndbg.aglib.regs.pc, 1, emulate=use_emulation, show_prev_insns=False, use_cache=False ) diff --git a/pwndbg/commands/dumpargs.py b/pwndbg/commands/dumpargs.py index 9b912b7be..ee4ff8f21 100644 --- a/pwndbg/commands/dumpargs.py +++ b/pwndbg/commands/dumpargs.py @@ -4,7 +4,7 @@ import argparse from typing import List import pwndbg.aglib.arch -import pwndbg.aglib.disasm +import pwndbg.aglib.disasm.disassembly import pwndbg.arguments import pwndbg.chain import pwndbg.commands @@ -38,7 +38,7 @@ def call_args() -> List[str]: """ results: List[str] = [] - for arg, value in pwndbg.arguments.get(pwndbg.aglib.disasm.one()): + for arg, value in pwndbg.arguments.get(pwndbg.aglib.disasm.disassembly.one()): code = arg.type != "char" pretty = ( pwndbg.chain.format(value, code=code) diff --git a/pwndbg/commands/rop.py b/pwndbg/commands/rop.py index 96cfe85f6..10e796598 100644 --- a/pwndbg/commands/rop.py +++ b/pwndbg/commands/rop.py @@ -15,7 +15,7 @@ import pwndbg.aglib.vmmap import pwndbg.color.message as M import pwndbg.commands import pwndbg.lib.memory -from pwndbg.aglib.disasm import get_disassembler +from pwndbg.aglib.disasm.disassembly import get_disassembler from pwndbg.commands import CommandCategory diff --git a/pwndbg/commands/search.py b/pwndbg/commands/search.py index ca3fdd46f..cb0992645 100644 --- a/pwndbg/commands/search.py +++ b/pwndbg/commands/search.py @@ -11,7 +11,7 @@ import pwnlib import pwndbg import pwndbg.aglib.arch -import pwndbg.aglib.disasm +import pwndbg.aglib.disasm.disassembly import pwndbg.aglib.vmmap import pwndbg.color.memory as M import pwndbg.commands diff --git a/pwndbg/emu/emulator.py b/pwndbg/emu/emulator.py index 2195252a4..3d2c5d1d1 100644 --- a/pwndbg/emu/emulator.py +++ b/pwndbg/emu/emulator.py @@ -16,7 +16,7 @@ import capstone as C import unicorn as U import pwndbg.aglib.arch -import pwndbg.aglib.disasm +import pwndbg.aglib.disasm.disassembly import pwndbg.aglib.memory import pwndbg.aglib.regs import pwndbg.aglib.strings @@ -444,7 +444,7 @@ class Emulator: rwx = exe = False if exe: - pwndbg_instr = pwndbg.aglib.disasm.one_raw(value) + pwndbg_instr = pwndbg.aglib.disasm.disassembly.one_raw(value) if pwndbg_instr: instr = f"{pwndbg_instr.mnemonic} {pwndbg_instr.op_str}" if pwndbg.config.syntax_highlight: @@ -806,7 +806,7 @@ class Emulator: def until_call(self, pc=None): addr, target = self.until_jump(pc) - while target and not pwndbg.aglib.disasm.one_raw(addr).call_like: + while target and not pwndbg.aglib.disasm.disassembly.one_raw(addr).call_like: addr, target = self.until_jump(target) return addr, target @@ -844,7 +844,7 @@ class Emulator: pc = pc or self.pc - insn = pwndbg.aglib.disasm.one_raw(pc) + insn = pwndbg.aglib.disasm.disassembly.one_raw(pc) # If we don't know how to disassemble, bail. if insn is None: diff --git a/pwndbg/enhance.py b/pwndbg/enhance.py index 6e6401198..ddc626349 100644 --- a/pwndbg/enhance.py +++ b/pwndbg/enhance.py @@ -14,7 +14,7 @@ from typing import Tuple import pwndbg import pwndbg.aglib.arch -import pwndbg.aglib.disasm +import pwndbg.aglib.disasm.disassembly import pwndbg.aglib.memory import pwndbg.aglib.strings import pwndbg.aglib.typeinfo @@ -112,7 +112,7 @@ def enhance( rwx = exe = False if exe: - pwndbg_instr = pwndbg.aglib.disasm.one(value) + pwndbg_instr = pwndbg.aglib.disasm.disassembly.one(value) if pwndbg_instr: # For telescoping, we don't want the extra spaces between the mnemonic and operands # which are baked in during enhancement. This removes those spaces. diff --git a/tests/qemu-tests/tests/user/test_aarch64.py b/tests/qemu-tests/tests/user/test_aarch64.py index 58fc2689b..ad8ef53a7 100644 --- a/tests/qemu-tests/tests/user/test_aarch64.py +++ b/tests/qemu-tests/tests/user/test_aarch64.py @@ -4,7 +4,7 @@ import gdb import user from capstone.arm64_const import ARM64_INS_BL -import pwndbg.aglib.disasm +import pwndbg.aglib.disasm.disassembly import pwndbg.aglib.nearpc import pwndbg.aglib.stack import pwndbg.aglib.symbol @@ -40,7 +40,7 @@ def test_aarch64_branch_enhancement(qemu_assembly_run): """ qemu_assembly_run(SIMPLE_FUNCTION, "aarch64") - instruction = pwndbg.aglib.disasm.one_with_config() + instruction = pwndbg.aglib.disasm.disassembly.one_with_config() assert instruction.id == ARM64_INS_BL assert instruction.call_like @@ -79,7 +79,7 @@ def test_aarch64_branch_enhancement(qemu_assembly_run): # Now, ensure the `b` instruction is set correctly. gdb.execute("ni") - instruction = pwndbg.aglib.disasm.one_with_config() + instruction = pwndbg.aglib.disasm.disassembly.one_with_config() assert not instruction.is_conditional_jump assert instruction.is_unconditional_jump @@ -90,7 +90,7 @@ def test_aarch64_syscall_annotation(qemu_assembly_run): """ qemu_assembly_run(AARCH64_GRACEFUL_EXIT, "aarch64") - instructions = pwndbg.aglib.disasm.near( + instructions = pwndbg.aglib.disasm.disassembly.near( address=pwndbg.aglib.regs.pc, instructions=3, emulate=True )[0] future_syscall_ins = instructions[2] @@ -124,7 +124,10 @@ def test_aarch64_syscall_annotation(qemu_assembly_run): gdb.execute("stepuntilasm svc") # Both for emulation and non-emulation, ensure a syscall at current PC gets enriched - instructions = pwndbg.aglib.disasm.emulate_one(), pwndbg.aglib.disasm.no_emulate_one() + instructions = ( + pwndbg.aglib.disasm.disassembly.emulate_one(), + pwndbg.aglib.disasm.disassembly.no_emulate_one(), + ) for i in instructions: assert i.syscall == 93 @@ -207,36 +210,36 @@ def test_aarch64_conditional_jumps(qemu_assembly_run): qemu_assembly_run(CONDITIONAL_JUMPS, "aarch64") gdb.execute("stepuntilasm cbz") - ins = pwndbg.aglib.disasm.one_with_config() + ins = pwndbg.aglib.disasm.disassembly.one_with_config() assert ins.condition == InstructionCondition.TRUE gdb.execute("si") - ins = pwndbg.aglib.disasm.one_with_config() + ins = pwndbg.aglib.disasm.disassembly.one_with_config() assert ins.condition == InstructionCondition.TRUE gdb.execute("si") - ins = pwndbg.aglib.disasm.one_with_config() + ins = pwndbg.aglib.disasm.disassembly.one_with_config() assert ins.condition == InstructionCondition.TRUE gdb.execute("si") - ins = pwndbg.aglib.disasm.one_with_config() + ins = pwndbg.aglib.disasm.disassembly.one_with_config() assert ins.condition == InstructionCondition.TRUE gdb.execute("si") gdb.execute("si") - ins = pwndbg.aglib.disasm.one_with_config() + ins = pwndbg.aglib.disasm.disassembly.one_with_config() assert ins.condition == InstructionCondition.FALSE gdb.execute("si") gdb.execute("si") - ins = pwndbg.aglib.disasm.one_with_config() + ins = pwndbg.aglib.disasm.disassembly.one_with_config() assert ins.condition == InstructionCondition.TRUE