You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pwndbg/pwndbg/aglib/disasm/arch.py

1053 lines
42 KiB
Python

from __future__ import annotations
from typing import TYPE_CHECKING
from typing import Callable
from typing import Dict
from typing import List
from typing import Tuple
from capstone import * # noqa: F403
from pwnlib.constants import linux
import pwndbg.aglib.arch
import pwndbg.aglib.memory
import pwndbg.aglib.regs
import pwndbg.aglib.remote
import pwndbg.aglib.typeinfo
import pwndbg.aglib.vmmap
import pwndbg.chain
import pwndbg.color.context as C
import pwndbg.color.memory as MemoryColor
import pwndbg.color.message as MessageColor
import pwndbg.color.syntax_highlight as H
import pwndbg.enhance
import pwndbg.lib.config
import pwndbg.lib.disasm.helpers as bit_math
from pwndbg.aglib.disasm.instruction import FORWARD_JUMP_GROUP
from pwndbg.aglib.disasm.instruction import EnhancedOperand
from pwndbg.aglib.disasm.instruction import InstructionCondition
from pwndbg.aglib.disasm.instruction import PwndbgInstruction
# Emulator currently requires GDB, and we only use it here for type checking.
if TYPE_CHECKING:
from pwndbg.emu.emulator import Emulator
pwndbg.config.add_param(
"emulate",
"on",
"""
Unicorn emulation of code from the current PC register
""",
help_docstring="""\
emulate can be:
off - no emulation is performed
jumps-only - emulation is done only to resolve branch instructions
on - emulation is done to resolve registers/memory values etc.
Emulation can slow down Pwndbg. Disabling it may improve performance.
Emulation requires >1GB RAM being available on the system and ability to allocate RWX memory.
""",
param_class=pwndbg.lib.config.PARAM_ENUM,
enum_sequence=["on", "off", "jumps-only"],
)
# Even if this is disabled, branch instructions will still have targets printed
pwndbg.config.add_param(
"disasm-annotations",
True,
"""
Display annotations for instructions to provide context on operands and results
""",
)
pwndbg.config.add_param(
"emulate-annotations",
True,
"""
Unicorn emulation for register and memory value annotations on instructions
""",
)
# If this is false, emulation is only used for the current instruction (if emulate-annotations is enabled)
pwndbg.config.add_param(
"emulate-future-annotations",
True,
"""
Unicorn emulation to annotate instructions after the current program counter
""",
)
# Effects future instructions, as past ones have already been cached and reflect the process state at the time
pwndbg.config.add_param("disasm-telescope-depth", 3, "Depth of telescope for disasm annotations")
# In disasm view, long telescoped strings might cause lines wraps
pwndbg.config.add_param(
"disasm-telescope-string-length",
50,
"Number of characters in strings to display in disasm annotations",
)
pwndbg.config.add_param(
"disasm-inline-symbols",
True,
"Enable replacing constant operands with their symbol in the disassembly",
)
def syntax_highlight(ins):
return H.syntax_highlight(ins, filename=".asm")
DEBUG_ENHANCEMENT = False
# DEBUG_ENHANCEMENT = True
groups = {v: k for k, v in globals().items() if k.startswith("CS_GRP_")}
ops = {v: k for k, v in globals().items() if k.startswith("CS_OP_")}
access = {v: k for k, v in globals().items() if k.startswith("CS_AC_")}
for value1, name1 in dict(access).items():
for value2, name2 in dict(access).items():
# novermin
access.setdefault(value1 | value2, f"{name1} | {name2}")
# These instruction types should not be emulated through, either
# because they cannot be emulated without interfering (syscall, etc.)
# or because they change privilege levels.
# There is an additional check for CS_GRP_CALL specially in the enhancement code, which we stop at
DO_NOT_EMULATE = {
CS_GRP_INT,
CS_GRP_INVALID,
CS_GRP_IRET,
# Note that we explicitly do not include the PRIVILEGE category, since
# we may be in kernel code, and privileged instructions are just fine
# in that case.
# capstone.CS_GRP_PRIVILEGE,
}
def register_assign(left: str, right: str) -> str:
return f"{left} => {right}"
def memory_assign(left: str, right: str) -> str:
return f"{left} <= {right}"
def memory_or_register_assign(left: str, right: str, mem_assign: bool) -> str:
"""
Used when we don't know until runtime whether a codepath will annotate a register or memory location.
"""
return memory_assign(left, right) if mem_assign else register_assign(left, right)
# Enhances disassembly with memory values & symbols by adding member variables to an instruction
# The only public method that should be called is "enhance"
# The enhance function is passed an instance of the Unicorn emulator
# and will .single_step() it to determine operand values before and after executing the instruction
class DisassemblyAssistant:
# Registry of all instances, {architecture: instance}
assistants: Dict[str, DisassemblyAssistant] = {}
def __init__(self, architecture: str) -> None:
if architecture is not None:
self.assistants[architecture] = self
self.op_handlers: Dict[
int, Callable[[PwndbgInstruction, EnhancedOperand, Emulator], int | None]
] = {
CS_OP_IMM: self._parse_immediate, # Return immediate value
CS_OP_REG: self._parse_register, # Return value of register
# Handler for memory references (as dictated by Capstone), such as first operand of "mov qword ptr [rbx + rcx*4], rax"
CS_OP_MEM: self._parse_memory, # Return parsed address, do not dereference
}
# Return a string corresponding to operand. Used to reduce code duplication while printing
# REG type wil return register name, "RAX"
self.op_names: Dict[int, Callable[[PwndbgInstruction, EnhancedOperand], str | None]] = {
CS_OP_IMM: self._immediate_string,
CS_OP_REG: self._register_string,
CS_OP_MEM: self._memory_string,
}
@staticmethod
def for_current_arch() -> DisassemblyAssistant:
return DisassemblyAssistant.assistants.get(pwndbg.aglib.arch.current, None)
# Mutates the "instruction" object
@staticmethod
def enhance(instruction: PwndbgInstruction, emu: Emulator = None) -> None:
# Assumed that the emulator's pc is at the instruction's address
# There are 3 degrees of emulation:
# 1. No emulation at all. In this case, the `emu` parameter should be None
# 2. Only emulate jumps - the only interaction with the emulator in this case is stepping it and reading the PC
# 3. Full emulation - read registers and memory from the emulator as well as determining jumps
if DEBUG_ENHANCEMENT:
print(
f"Start enhancing instruction at {hex(instruction.address)} - {instruction.mnemonic} {instruction.op_str}"
)
# Get another reference to the emulator for the purposes of jumps
jump_emu = emu
if pwndbg.config.emulate != "on":
emu = None
# For both cases below, set emu to None so we don't use it for annotation
if emu and not bool(pwndbg.config.emulate_annotations):
emu = None
# Disable emulation for future annotations based on setting
if (
emu
and pwndbg.aglib.regs.pc != instruction.address
and not bool(pwndbg.config.emulate_future_annotations)
):
emu = None
# Ensure emulator's program counter is at the correct location.
# This occurs very rarely - observed sometimes when the remote is stalling, ctrl-c, and for some reason emulator returns PC=0.
if emu:
if emu.pc != instruction.address:
if DEBUG_ENHANCEMENT:
print(
f"Program counter and emu.pc do not line up: {hex(pwndbg.aglib.regs.pc)=} {hex(emu.pc)=}"
)
emu = jump_emu = None
enhancer: DisassemblyAssistant = DisassemblyAssistant.assistants.get(
pwndbg.aglib.arch.current, generic_assistant
)
# Don't disable emulation yet, as we can use it to read the syscall register
enhancer._enhance_syscall(instruction, emu)
# Disable emulation for instructions we don't want to emulate (CALL, INT, ...)
if emu and set(instruction.groups) & DO_NOT_EMULATE:
emu.valid = False
emu = jump_emu = None
if DEBUG_ENHANCEMENT:
print("Turned off emulation - not emulating certain type of instruction")
# This function will .single_step the emulation
if not enhancer._enhance_operands(instruction, emu, jump_emu):
if jump_emu is not None and DEBUG_ENHANCEMENT:
print(f"Emulation failed at {instruction.address=:#x}")
emu = None
jump_emu = None
if jump_emu is not None:
# We successfully used emulation for this instruction
instruction.emulated = True
# Set the .condition field
enhancer._enhance_conditional(instruction, emu)
# Set the .target and .next fields
enhancer._enhance_next(instruction, emu, jump_emu)
if bool(pwndbg.config.disasm_annotations):
enhancer._set_annotation_string(instruction, emu)
# Disable emulation after CALL instructions. We do it after enhancement, as we can use emulation
# to determine the call's target address.
if jump_emu and instruction.call_like:
jump_emu.valid = False
jump_emu = None
emu = None
if DEBUG_ENHANCEMENT:
print("Turned off emulation for call")
if DEBUG_ENHANCEMENT:
print(enhancer.dump(instruction))
print("Done enhancing")
# Subclasses for specific architecture should override this
def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
"""
The goal of this function is to set the `annotation` field of the instruction,
which is the string to be printed in a disasm view.
"""
return None
def _enhance_operands(
self, instruction: PwndbgInstruction, emu: Emulator, jump_emu: Emulator
) -> bool:
"""
Enhances the operands by determining values and symbols
When emulation is enabled, this will `single_step` the emulation to determine the value of registers
before and after the instrution has executed.
For each operand explicitly written to or read from (instruction.operands), sets the following fields:
operand.before_value
Integer value of the operand before instruction executes.
None if cannot be resolved/reasoned about.
operand.after_value
Integer value of the operand after instruction executes.
Only set when emulation is enabled. Otherwise None.
This is relevent if we read and write to the same registers within an instruction
operand.symbol:
Resolved symbol name for this operand, if .before_value is set, else None.
operand.str:
String representing the operand
Return False if emulation fails (so we don't use it in additional enhancement steps)
"""
# Apply syntax highlighting to the assembly
if pwndbg.config.syntax_highlight:
instruction.asm_string = syntax_highlight(instruction.asm_string)
# Populate the "operands" list of the instruction
# Set before_value, symbol, and str
for op in instruction.operands:
# Retrieve the value, either an immediate, from a register, or from memory
op.before_value = self.op_handlers.get(op.type, lambda *a: None)(instruction, op, emu)
if op.before_value is not None:
# Don't mask immediates - some computations depend on their signed values
if op.type is not CS_OP_IMM:
op.before_value &= pwndbg.aglib.arch.ptrmask
op.symbol = MemoryColor.attempt_colorized_symbol(op.before_value)
op.before_value_resolved = self._resolve_used_value(
op.before_value, instruction, op, emu
)
if op.symbol and op.type == CS_OP_IMM and pwndbg.config.disasm_inline_symbols:
# Make an inline replacement, so `jmp 0x400122` becomes `jmp function_name`
instruction.asm_string = instruction.asm_string.replace(
hex(op.before_value), op.symbol
)
# Execute the instruction
if jump_emu and None in jump_emu.single_step():
# This branch is taken if stepping the emulator failed
jump_emu = None
emu = None
# Set after_value after single stepping the emulator
if emu is not None:
# after_value
for op in instruction.operands:
# Retrieve the value, either an immediate, from a register, or from memory
op.after_value = self.op_handlers.get(op.type, lambda *a: None)(
instruction, op, emu
)
op.after_value_resolved = self._resolve_used_value(
op.after_value, instruction, op, emu
)
if op.after_value is not None:
op.after_value &= pwndbg.aglib.arch.ptrmask
# Set .str value of operands, after emulation has been completed
for op in instruction.operands:
op.str = self.op_names.get(op.type, lambda *a: None)(instruction, op)
return jump_emu is not None
def can_reason_about_process_state(self, instruction: PwndbgInstruction) -> bool:
"""
Determine if the program counter of the process equals the address of the instruction being enhanced.
If so, it means we can safely reason and read from registers and memory to enhance values that
we can add to the annotation string. This becomes relevent when NOT emulating, and is meant to
allow more details when the PC is at the instruction being enhanced
"""
return instruction.address == pwndbg.aglib.regs.pc
# Delegates to "read_register", which takes Capstone ID for register.
def _parse_register(
self, instruction: PwndbgInstruction, operand: EnhancedOperand, emu: Emulator
) -> int | None:
reg = operand.reg
return self._read_register(instruction, reg, emu)
# Determine memory address of operand (Ex: in x86, mov rax, [rip + 0xd55], would return $rip_after_instruction+0xd55)
# Subclasses override for specific architectures
def _parse_memory(
self, instruction: PwndbgInstruction, operand: EnhancedOperand, emu: Emulator
) -> int | None:
return None
def _parse_immediate(
self, instruction: PwndbgInstruction, operand: EnhancedOperand, emu: Emulator
):
return operand.imm
def _read_register(
self, instruction: PwndbgInstruction, operand_id: int, emu: Emulator
) -> int | None:
"""
Read value in register. Return None if cannot reason about the value in the register.
Different architectures use registers in different patterns, so it is best to
override this to get to best behavior for a given architecture. See x86.py as example.
operand_id is the ID internal to Capstone
"""
regname: str = instruction.cs_insn.reg_name(operand_id)
return self._read_register_name(instruction, regname, emu)
# Read register by its name
def _read_register_name(
self, instruction: PwndbgInstruction, regname: str, emu: Emulator
) -> int | None:
if emu:
# Will read the value of register from the emulator
# Be concious about calling this before/after stepping the emulator
value = emu.read_register(regname)
if DEBUG_ENHANCEMENT:
print(f"Register in emulation returned {regname}={hex(value)}")
return value
elif self.can_reason_about_process_state(instruction):
# When instruction address == pc, we can reason about all registers.
# The values will just reflect values prior to executing the instruction, instead of after,
# which is relevent if we are writing to this register.
# However, the information can still be useful for display purposes.
if DEBUG_ENHANCEMENT:
print(f"Read value from process register: {pwndbg.aglib.regs[regname]}")
return pwndbg.aglib.regs[regname]
else:
return None
# Read memory of given size, taking into account emulation and being able to reason about the memory location
def _read_memory(
self,
address: int,
size: int,
instruction: PwndbgInstruction,
emu: Emulator,
) -> int | None:
address_list = self._telescope(address, 1, instruction, emu, read_size=size)
if len(address_list) >= 2:
return address_list[1]
return None
# Pass in a operand and it's value, and determine the actual value used during an instruction
# Helpful for cases like `cmp byte ptr [rip + 0x166669], 0`, where first operand could be
# a register or a memory value to dereference, and we want the actual value used.
# Override this to implement memory lookups in given architecture (if it's relevent)
# Different architecture read memory differently:
# - Only a couple Capstone architectures support the memory .size field, which determines read width.
# - In others, read/write width is implied.
def _resolve_used_value(
self,
value: int | None,
instruction: PwndbgInstruction,
operand: EnhancedOperand,
emu: Emulator,
) -> int | None:
if value is None:
return None
if operand.type == CS_OP_REG or operand.type == CS_OP_IMM:
return value
elif operand.type == CS_OP_MEM:
# Assume that we are reading ptrsize - subclasses should override this function
# to provide a more specific value if needed
self._read_memory(value, pwndbg.aglib.arch.ptrsize, instruction, emu)
return None
def _telescope(
self,
address: int,
limit: int,
instruction: PwndbgInstruction,
emu: Emulator,
read_size: int = None,
) -> List[int]:
"""
Dereference an address recursively - takes into account emulation.
It will only dereference as it is safe to do so, meaning the last value in the returned list may be a pointer
The list that the function returns is guaranteed have len >= 1
"""
can_read_process_state = self.can_reason_about_process_state(instruction)
if emu:
return emu.telescope(address, limit, read_size=read_size)
elif can_read_process_state:
# Can reason about memory in this case.
if read_size is not None and read_size < pwndbg.aglib.arch.ptrsize:
result = [address]
size_type = pwndbg.aglib.typeinfo.get_type(read_size)
try:
read_value = int(
pwndbg.aglib.memory.get_typed_pointer_value(size_type, address)
)
result.append(read_value)
except pwndbg.dbg_mod.Error:
pass
return result
else:
return pwndbg.chain.get(address, limit=limit)
else:
# If the target address is in a non-writeable map, we can pretty safely telescope
# This is best-effort to give a better experience
address_list = [address]
for _ in range(limit):
if address_list.count(address) >= 2:
break
page = pwndbg.aglib.vmmap.find(address)
if page and not page.write:
try:
address = int(
pwndbg.aglib.memory.get_typed_pointer_value(
pwndbg.aglib.typeinfo.ppvoid, address
)
)
address &= pwndbg.aglib.arch.ptrmask
address_list.append(address)
except pwndbg.dbg_mod.Error:
break
else:
break
return address_list
# We cannot telescope, but we can still return the address.
# Just without any further information
return [address]
# Dispatch to the appropriate format handler. Pass the list returned by `telescope()` to this function
def _telescope_format_list(self, addresses: List[int], limit: int, emu: Emulator) -> str:
# It is assumed proper checks have been made BEFORE calling this function so that pwndbg.chain.format
# will return values accurate to the program state at the time of instruction executing.
enhance_string_len = int(pwndbg.config.disasm_telescope_string_length)
if emu:
return emu.format_telescope_list(
addresses, limit, enhance_string_len=enhance_string_len
)
else:
# We can format, but in some cases we may not be able to reason about memory, so don't allow
# it to dereference to last value in memory (we can't determine what value it is)
return pwndbg.chain.format(
addresses,
limit=limit,
enhance_string_len=enhance_string_len,
)
@staticmethod
def _syscall_name(number: int, arch: str) -> str | None:
"""
Given a syscall number and architecture, returns the name of the syscall.
E.g. execve == 59 on x86-64
"""
arch_module = {
"arm": linux.arm,
"armcm": linux.arm,
"i386": linux.i386,
"mips": linux.mips,
"x86-64": linux.amd64,
"aarch64": linux.aarch64,
"rv32": linux.riscv64,
"rv64": linux.riscv64,
}.get(arch)
if arch_module is None:
return None
prefix = "__NR_"
for k, v in arch_module.__dict__.items():
if v != number:
continue
if not k.startswith(prefix):
continue
return k[len(prefix) :].lower()
return None
def _enhance_syscall(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
if CS_GRP_INT not in instruction.groups:
return None
syscall_arch, syscall_register = self._get_syscall_arch_info(instruction)
if syscall_arch is None:
return None
instruction.syscall = self._read_register_name(instruction, syscall_register, emu)
if instruction.syscall is not None:
instruction.syscall_name = (
DisassemblyAssistant._syscall_name(instruction.syscall, syscall_arch)
or "<unk_%d>" % instruction.syscall
)
def _get_syscall_arch_info(self, instruction) -> Tuple[str, str]:
"""
Return tuple of (name of syscall architecture, syscall register name)
Elements of the tuple will be None to indicate it's not a syscall
"""
return (pwndbg.aglib.arch.name, pwndbg.lib.abi.ABI.syscall().syscall_register)
def _enhance_conditional(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
"""
Sets the `condition` of the instruction
If the instruction is always executed unconditionally, or we cannot reason about the instruction,
the value of the field is `InstructionCondition.UNDETERMINED`.
If the instruction is executed conditionally, and we can be absolutely
sure that it will be executed, the value of the field is `InstructionCondition.TRUE`.
In all other cases, it is set to `InstructionCondition.FALSE`.
"""
instruction.condition = self._condition(instruction, emu)
# Subclasses should override
def _condition(self, instruction: PwndbgInstruction, emu: Emulator) -> InstructionCondition:
return InstructionCondition.UNDETERMINED
def _enhance_next(
self, instruction: PwndbgInstruction, emu: Emulator, jump_emu: Emulator
) -> None:
"""
Set the `next` and `target` field of the instruction.
By default, it is set to the address of the next linear
instruction.
`next` is the address that the PC would be upon using the GDB `nexti` command,
`target` is the jump target whether or not the jump is taken, like `stepi` and assuming the jump is taken.
If the instruction is a non-"call" branch and either:
- Is unconditional, or is conditional and is known to be taken, a
- Is conditional, but is known to be taken
And the target can be resolved, it is set to the address
of the jump target.
"""
next_addr: int | None = None
# The order for the following statements in determining the next executed instruction is important
#
# Firstly, we check the condition field - this field is manually set by our enhancement code
# There are cases where the Unicorn emulator is incorrect - for example, delay slots in MIPS causing jumps to not resolve correctly
# due to the way we single-step the emulator. We want our own manual checks to override the emulator
if not instruction.call_like and (
instruction.condition == InstructionCondition.TRUE or instruction.is_unconditional_jump
):
# Don't allow call instructions - we want the actual "nexti" address
# If condition is true, then this might be a conditional jump
# There are some other instructions that run conditionally though - resolve_target returns None in those cases
# Or, if this is a unconditional jump, we will try to resolve target
next_addr = self._resolve_target(instruction, emu)
# Secondly, attempt to use emulation if we could not resolve the target above, or don't have custom condition handler for the architecture yet
if next_addr is None and jump_emu:
# Use emulator to determine the next address:
# 1. Only use it to determine non-call's (`nexti` should step over calls)
# 2. Make sure we haven't manually set .condition to False (which should override the emulators prediction)
if not instruction.call_like and instruction.condition != InstructionCondition.FALSE:
next_addr = jump_emu.pc
# Handle edge case - if the target happens to be the next address in memory and it's a jump, we need this variable
# so the disasm output is accurate.
if next_addr is not None and instruction.is_unconditional_jump:
instruction.force_unconditional_jump_target = True
# All else fails, take the next instruction in memory
if next_addr is None:
next_addr = instruction.address + instruction.size
# Determine the target of this address.
# This is the address that the instruction could potentially change the program counter to, meaning that `stepi` would go to the target
instruction.target = self._resolve_target(instruction, emu)
instruction.next = next_addr & pwndbg.aglib.arch.ptrmask
if instruction.target is None:
instruction.target = instruction.next
if instruction.has_jump_target:
# Only bother doing the symbol lookup if this is a jump
instruction.target_string = MemoryColor.get_address_or_symbol(instruction.target)
if (
instruction.operands
and instruction.operands[0].before_value
and instruction.operands[0].type == CS_OP_IMM
):
instruction.target_const = True
# This is the default implementation.
# Subclasses should override this for more accurate behavior/to catch more cases. See x86.py as example
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
"""
Architecture-specific hook point for _enhance_next.
Returns the program counter target of this instruction.
Even in the case of conditional jumps, the potential target should be resolved.
"""
# The FORWARD_JUMP_GROUP here is very specific
# We only want this resolver to work for instructions that Capstone
# explicitely labels as jump instructions. If we determine that another type of instruction
# can have a target, we resolve it manually, as this manual resolver would return improper values,
# as it is built on the assumptions of branch instructions across many architectures.
if not bool(instruction.groups & FORWARD_JUMP_GROUP):
return None
addr = None
# At this point, all operands have been resolved.
# Assume only single-operand jumps.
if len(instruction.operands) == 1:
op = instruction.operands[0]
addr = self._resolve_used_value(op.before_value, instruction, op, emu)
if addr:
addr &= pwndbg.aglib.arch.ptrmask
else:
# Some architectures have jumps with multiple operands. In this case, this default implementation
# does a simple naive check. Iterate all operands, pick the first one resolves to a symbol or lands in executable memory
# and use that as the target
# Reversed order, just because through observation the immediates and labels are often farther right
for op in reversed(instruction.operands):
resolved_addr = self._resolve_used_value(op.before_value, instruction, op, emu)
if resolved_addr:
resolved_addr &= pwndbg.aglib.arch.ptrmask
if op.symbol:
addr = resolved_addr
else:
page = pwndbg.aglib.vmmap.find(resolved_addr)
# When debugging a remote QEMU target, the page permissions are not accurate.
# In this case, if the candidate address is mapped at all, just go with it.
if page and (page.execute or pwndbg.aglib.remote.is_remote()):
addr = resolved_addr
if addr is not None:
instruction.target_const = op.type == CS_OP_IMM
break
if addr is None:
return None
return int(addr)
def dump(self, instruction: PwndbgInstruction):
"""
Debug-only method.
"""
return repr(instruction)
# String functions assume the .before_value and .after_value have been set
def _immediate_string(self, instruction, operand) -> str:
value = operand.before_value
if abs(value) < 0x10:
return "%i" % value
return "%#x" % value
def _register_string(self, instruction: PwndbgInstruction, operand: EnhancedOperand):
"""
Return colorized register string
"""
reg = operand.reg
name = C.register(instruction.cs_insn.reg_name(reg).upper())
# If using emulation and we determined the value didn't change, don't colorize
if (
operand.before_value is not None
and operand.after_value is not None
and operand.before_value == operand.after_value
):
return name
else:
return C.register_changed(name)
def _memory_string(self, instruction: PwndbgInstruction, operand: EnhancedOperand):
"""
Example: return "[_IO_2_1_stdin_+16]", where the address/symbol is colorized
"""
if operand.before_value is not None:
return f"[{MemoryColor.get_address_or_symbol(operand.before_value)}]"
else:
return None
def _common_generic_register_destination(
self, instruction: PwndbgInstruction, emu: Emulator
) -> None:
"""
This function can be used to annotate instructions that have a register destination.
In the vast majority of instructions in most architectures, the destination register is the first operand.
Using emulation, it will determine the value placed into the register, and create an annotation string based on the result.
"""
left = instruction.operands[0]
# Emulating determined the value that was set in the destination register
if left.after_value is not None:
TELESCOPE_DEPTH = max(0, int(pwndbg.config.disasm_telescope_depth))
# Telescope the address
telescope_addresses = self._telescope(
left.after_value,
TELESCOPE_DEPTH + 1,
instruction,
emu,
read_size=pwndbg.aglib.arch.ptrsize,
)
if not telescope_addresses:
return
instruction.annotation = register_assign(
left.str, self._telescope_format_list(telescope_addresses, TELESCOPE_DEPTH, emu)
)
def _common_cmp_annotator_builder(
self, flags_register_name: str, char_to_separate_operands: str = "-"
) -> Callable[[PwndbgInstruction, Emulator], None]:
"""
Many architectures implement near-identical `CMP`-like instructions.
It takes two values, either subtracts, adds, or does some bit operation
with them to set values in the flag register.
To reduce code duplication, subclasses can use this function to create an annotator for CMP-like instructions.
"""
FLAG_REG_NAME_DISPLAY = flags_register_name.upper()
def handler(instruction: PwndbgInstruction, emu: Emulator):
# If there are just two operands, we can assume we are comparing them directly, and can display the values.
# Some architectures have variants with more operands.
if len(instruction.operands) == 2:
left, right = instruction.operands
if (l_value := left.before_value_resolved) is not None and (
r_value := right.before_value_resolved
) is not None:
print_left, print_right = pwndbg.enhance.format_small_int_pair(l_value, r_value)
# Ex: "0x7f - 0x12" or "0xdffffdea + 0x8"
instruction.annotation = (
f"{print_left} {char_to_separate_operands} {print_right}"
)
# Using emulation, we can determine the resulting value put into the flag register
if emu:
eflags_bits = pwndbg.aglib.regs.flags[flags_register_name]
emu_eflags = emu.read_register(flags_register_name)
eflags_formatted = C.format_flags(emu_eflags, eflags_bits)
display_result = register_assign(FLAG_REG_NAME_DISPLAY, eflags_formatted)
if instruction.annotation is None:
# First part of this function usually sets .annotation to a string. But if the instruction
# has more than two operands, then we don't have a way of showing them, so this avoids the "+="" below
instruction.annotation = display_result
else:
instruction.annotation += " " * 5 + display_result
return handler
def _common_load_annotator(
self,
instruction: PwndbgInstruction,
emu: Emulator,
address: int | None,
read_size: int,
signed: bool,
target_size: int,
dest_str: str,
source_str: str,
) -> None:
"""
This function annotates load instructions - moving data from memory into a register.
These instructions read `read_size` bytes from memory into a register.
`signed`: whether or not we are loading a signed value from memory
`target_size`: the size of the register in bytes - relevent for sign-extension
`dest_str`: a string representing the destination register ('rax')
`source_str`: a string representing the source address ('[0x7fffffffe138]')
"""
if address is None:
return
# There are many cases we need to consider when we are loading a value from memory
# Were we able to reason about the memory address, and dereference it?
# Does the resolved memory address actual point into memory?
# If the target register size is larger than the read size, then do we need sign-extension?
# If the address is not mapped, we segfaulted
if not pwndbg.aglib.memory.peek(address):
instruction.annotation = MessageColor.error(
f"<Cannot dereference [{MemoryColor.get(address)}]>"
)
else:
# In this branch, it is assumed that the address IS in a mapped page
TELESCOPE_DEPTH = max(1, int(pwndbg.config.disasm_telescope_depth))
telescope_addresses = self._telescope(
address,
TELESCOPE_DEPTH,
instruction,
emu,
read_size=read_size,
)
if len(telescope_addresses) == 1:
# If telescope returned only 1 address (and we already know the address is in a mapped page)
# it means we couldn't reason about the dereferenced memory.
# In this case, simply display the address
# As an example, this path is taken for the following case:
# mov rdi, qword ptr [rip + 0x17d40] where the resolved memory address is in writeable memory,
# and we are not emulating. This means we cannot savely dereference if PC is not at the current instruction address,
# because the the memory address could have been written to by the time the instruction executes
telescope_print = None
else:
if signed and read_size != target_size and len(telescope_addresses) == 2:
# We sign extend the value, then convert it back to the unsigned bit representation
final_value = bit_math.to_signed(telescope_addresses[1], read_size * 8) & (
(1 << (target_size * 8)) - 1
)
# If it's a signed read that required extension, it will just be a number with no special symbol/color needed
telescope_print = hex(final_value)
else:
# Start showing at dereferenced address, hence the [1:]
telescope_print = f"{self._telescope_format_list(telescope_addresses[1:], TELESCOPE_DEPTH, emu)}"
instruction.annotation = f"{dest_str}, {source_str}"
if telescope_print is not None:
instruction.annotation = register_assign(instruction.annotation, telescope_print)
def _common_store_annotator(
self,
instruction: PwndbgInstruction,
emu: Emulator,
address: int | None,
value: int | None,
write_size: int | None,
address_str: str,
) -> None:
"""
This function annotates store functions - moving data from a register to memory.
The `value` is truncated to match the `write_size`, if `write_size` is not None.
The annotation will indicate if the instruction will segfault.
`write_size`: number of bytes of `value` that will be written
"""
if address is None:
return
if not pwndbg.aglib.memory.peek(address):
instruction.annotation = MessageColor.error(
f"<Cannot dereference [{MemoryColor.get(address)}]>"
)
elif value is not None:
# To make this annotation work with emulation disabled,
# we telescope the value that is going to be placed in the memory operand
TELESCOPE_DEPTH = max(0, int(pwndbg.config.disasm_telescope_depth))
if write_size is not None:
value &= (1 << (write_size * 8)) - 1
telescope_addresses = self._telescope(
value,
TELESCOPE_DEPTH,
instruction,
emu,
)
instruction.annotation = memory_assign(
address_str, self._telescope_format_list(telescope_addresses, TELESCOPE_DEPTH, emu)
)
def _common_move_annotator(self, instruction: PwndbgInstruction, emu: Emulator):
"""
This function handles annotating `MOV` type instructions - where the value of one register is placed into another.
"""
if len(instruction.operands) == 2:
left, right = instruction.operands
# If we already used emulation, use the result, otherwise take the source operand before_value
result = left.after_value or right.before_value
if result is not None:
TELESCOPE_DEPTH = max(0, int(pwndbg.config.disasm_telescope_depth))
telescope_addresses = self._telescope(
result,
TELESCOPE_DEPTH + 1,
instruction,
emu,
)
if not telescope_addresses:
return
instruction.annotation = register_assign(
left.str, self._telescope_format_list(telescope_addresses, TELESCOPE_DEPTH, emu)
)
def _common_binary_op_annotator(
self,
instruction: PwndbgInstruction,
emu: Emulator,
target_operand: EnhancedOperand,
op_one: int | None,
op_two: int | None,
char_to_separate_operands: str,
memory_assignment=False,
) -> None:
# Ex: "0x198723 + 0x2b8"
math_string = None
if op_one is not None and op_two is not None:
print_left, print_right = pwndbg.enhance.format_small_int_pair(op_one, op_two)
math_string = f"{print_left} {char_to_separate_operands} {print_right}"
# Using emulation, we can determine the resulting value
if target_operand.after_value_resolved is not None:
instruction.annotation = memory_or_register_assign(
target_operand.str,
MemoryColor.get_address_and_symbol(target_operand.after_value_resolved),
memory_assignment,
)
if math_string:
instruction.annotation += f" ({math_string})"
elif math_string:
instruction.annotation = memory_or_register_assign(
target_operand.str, math_string, memory_assignment
)
generic_assistant = DisassemblyAssistant(None)