Annotate all load operations Arm, AArch64, RISCV, and MIPS (#2309)

* _generate_load_annotator

* Parent class function for load-instruction annotations. RISCV loads implemented and tested. SPARC load/store instructions noted

* Get ARM32 load and store instructions

* Add all AArch64 loads and stores

* MIPS memory address resolver

* AArch64 memory resolver

* AArch64 shift operation in memory operands

* Arm resolve memory operands and PC special case

* Lint

* comment

* rebase and lint

* comment corrections

* Fill in arm function maps

* extract mips load instructions

* lint

* Remove unnecessary parameter to enhancement telescope function

* Implement signed loads

* Now with load code moved to parent, refactor in x86 class

* lint

* aarch64 read size fix

* arm thumb mode pc + 4

* read thumb bit from emu when needed

* lint

* rebase

* lint

* rebase

* Add load annotator to MIPS

* lint

* fix last aarch64 register thing

* minor fixes
pull/2347/head
OBarronCS 1 year ago committed by GitHub
parent 6b85347806
commit 117a68b7f2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -20,6 +20,83 @@ from pwndbg.gdblib.disasm.instruction import InstructionCondition
from pwndbg.gdblib.disasm.instruction import PwndbgInstruction
from pwndbg.gdblib.disasm.instruction import boolean_to_instruction_condition
# Negative size indicates signed read
# None indicates the read size depends on the target register
AARCH64_SINGLE_LOAD_INSTRUCTIONS: Dict[int, int | None] = {
ARM64_INS_LDRB: 1,
ARM64_INS_LDURB: 1,
ARM64_INS_LDRSB: -1,
ARM64_INS_LDURSB: -1,
ARM64_INS_LDRH: 2,
ARM64_INS_LDURH: 2,
ARM64_INS_LDRSH: -2,
ARM64_INS_LDURSH: -2,
ARM64_INS_LDURSW: -4,
ARM64_INS_LDRSW: -4,
ARM64_INS_LDUR: None,
ARM64_INS_LDR: None,
ARM64_INS_LDTRB: 1,
ARM64_INS_LDTRSB: -1,
ARM64_INS_LDTRH: 2,
ARM64_INS_LDTRSH: -2,
ARM64_INS_LDTRSW: -4,
ARM64_INS_LDTR: None,
ARM64_INS_LDXRB: 1,
ARM64_INS_LDXRH: 2,
ARM64_INS_LDXR: None,
ARM64_INS_LDARB: 1,
ARM64_INS_LDARH: 2,
ARM64_INS_LDAR: None,
}
AARCH64_SINGLE_STORE_INSTRUCTIONS: Dict[int, int | None] = {
ARM64_INS_STRB: 1,
ARM64_INS_STURB: 1,
ARM64_INS_STRH: 2,
ARM64_INS_STURH: 2,
ARM64_INS_STUR: None,
ARM64_INS_STR: None,
# Store Register (unprivileged)
ARM64_INS_STTRB: 1,
ARM64_INS_STTRH: 2,
ARM64_INS_STTR: None,
# Store Exclusive
ARM64_INS_STXRB: 1,
ARM64_INS_STXRH: 2,
ARM64_INS_STXR: None,
# Store-Release
ARM64_INS_STLRB: 1,
ARM64_INS_STLRH: 2,
ARM64_INS_STLR: None,
# Store-Release Exclusive
ARM64_INS_STLXRB: 1,
ARM64_INS_STLXRH: 2,
ARM64_INS_STLXR: None,
}
# Parameters to each function: (value, shift_amt, bit_width)
AARCH64_BIT_SHIFT_MAP: Dict[int, Callable[[int, int, int], int]] = {
ARM64_SFT_LSL: bit_math.logical_shift_left,
ARM64_SFT_LSR: bit_math.logical_shift_right,
ARM64_SFT_ASR: bit_math.arithmetic_shift_right,
ARM64_SFT_ROR: bit_math.rotate_right,
}
# These are "Extend" operations - https://devblogs.microsoft.com/oldnewthing/20220728-00/?p=106912
# They take in a number, extract a byte, halfword, or word,
# and perform a zero- or sign-extend operation.
AARCH64_EXTEND_MAP: Dict[int, Callable[[int], int]] = {
ARM64_EXT_UXTB: lambda x: x & ((1 << 8) - 1),
ARM64_EXT_UXTH: lambda x: x & ((1 << 16) - 1),
ARM64_EXT_UXTW: lambda x: x & ((1 << 32) - 1),
ARM64_EXT_UXTX: lambda x: x, # UXTX has no effect. It extracts 64-bits from a 64-bit register.
ARM64_EXT_SXTB: lambda x: bit_math.to_signed(x, 8),
ARM64_EXT_SXTH: lambda x: bit_math.to_signed(x, 16),
ARM64_EXT_SXTW: lambda x: bit_math.to_signed(x, 32),
ARM64_EXT_SXTX: lambda x: bit_math.to_signed(x, 64),
}
def resolve_condition(condition: int, cpsr: int) -> InstructionCondition:
"""
@ -56,29 +133,6 @@ def resolve_condition(condition: int, cpsr: int) -> InstructionCondition:
return InstructionCondition.TRUE if condition else InstructionCondition.FALSE
# Parameters to each function: (value, shift_amt, bit_width)
AARCH64_BIT_SHIFT_MAP: Dict[int, Callable[[int, int, int], int]] = {
ARM64_SFT_LSL: bit_math.logical_shift_left,
ARM64_SFT_LSR: bit_math.logical_shift_right,
ARM64_SFT_ASR: bit_math.arithmetic_shift_right,
ARM64_SFT_ROR: bit_math.rotate_right,
}
# These are "Extend" operations - https://devblogs.microsoft.com/oldnewthing/20220728-00/?p=106912
# They take in a number, extract a byte, halfword, or word,
# and perform a zero- or sign-extend operation.
AARCH64_EXTEND_MAP: Dict[int, Callable[[int], int]] = {
ARM64_EXT_UXTB: lambda x: x & ((1 << 8) - 1),
ARM64_EXT_UXTH: lambda x: x & ((1 << 16) - 1),
ARM64_EXT_UXTW: lambda x: x & ((1 << 32) - 1),
ARM64_EXT_UXTX: lambda x: x, # UXTX has no effect. It extracts 64-bits from a 64-bit register.
ARM64_EXT_SXTB: lambda x: bit_math.to_signed(x, 8),
ARM64_EXT_SXTH: lambda x: bit_math.to_signed(x, 16),
ARM64_EXT_SXTW: lambda x: bit_math.to_signed(x, 32),
ARM64_EXT_SXTX: lambda x: bit_math.to_signed(x, 64),
}
class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
def __init__(self, architecture: str) -> None:
super().__init__(architecture)
@ -90,8 +144,6 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
ARM64_INS_ADR: self._common_generic_register_destination,
# ADRP
ARM64_INS_ADRP: self._common_generic_register_destination,
# LDR
ARM64_INS_LDR: self._common_generic_register_destination,
# ADD
ARM64_INS_ADD: self._common_generic_register_destination,
# SUB
@ -108,6 +160,26 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
ARM64_INS_CCMN: self._common_cmp_annotator_builder("cpsr", ""),
}
@override
def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
# Dispatch to the correct handler
if instruction.id in AARCH64_SINGLE_LOAD_INSTRUCTIONS:
target_reg_size = self._register_width(instruction, instruction.operands[0]) // 8
read_size = AARCH64_SINGLE_LOAD_INSTRUCTIONS[instruction.id] or target_reg_size
self._common_load_annotator(
instruction,
emu,
instruction.operands[1].before_value,
abs(read_size),
read_size < 0,
target_reg_size,
instruction.operands[0].str,
instruction.operands[1].str,
)
else:
self.annotation_handlers.get(instruction.id, lambda *a: None)(instruction, emu)
@override
def _condition(
self, instruction: PwndbgInstruction, emu: Emulator
@ -164,9 +236,58 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
return super()._resolve_target(instruction, emu, call)
@override
def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
# Dispatch to the correct handler
self.annotation_handlers.get(instruction.id, lambda *a: None)(instruction, emu)
def _parse_memory(
self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator
) -> int | None:
"""
Parse the `Arm64OpMem` Capstone object to determine the concrete memory address used.
Three types of AArch64 memory operands:
1. Register base with optional immediate offset
Examples:
ldrb w3, [x2]
str x1, [x2, #0xb58]
ldr x4,[x3], 4
2. Register + another register with an optional shift
Examples:
ldrb w1, [x9, x2]
str x1, [x2, x0, lsl #3]
3. Register + 32-bit register extended and shifted.
The shift in this case is implicitly a LSL
Examples:
ldr x1, [x2, w22, UXTW #3]
"""
target = 0
# All memory operands have `base` defined
base = self._read_register(instruction, op.mem.base, emu)
if base is None:
return None
target = base + op.mem.disp
# If there is an index register
if op.mem.index != 0:
index = self._read_register(instruction, op.mem.index, emu)
if index is None:
return None
# Optionally apply an extend to the index register
if op.cs_op.ext != 0:
index = AARCH64_EXTEND_MAP[op.cs_op.ext](index)
# Optionally apply shift to the index register
# This handles shifts in the extend operation as well:
# As in the case of `ldr x1, [x2, w22, UXTW #3]`,
# Capstone will automatically make the shift a LSL and set the value to 3
if op.cs_op.shift.type != 0:
# The form of instructions with a shift always apply the shift to a 64-bit value
index = AARCH64_BIT_SHIFT_MAP[op.cs_op.shift.type](index, op.cs_op.shift.value, 64)
target += index
return target
def _register_width(self, instruction: PwndbgInstruction, op: EnhancedOperand) -> int:
return 32 if instruction.cs_insn.reg_name(op.reg)[0] == "w" else 64

@ -10,6 +10,7 @@ from capstone import * # noqa: F403
import pwndbg.chain
import pwndbg.color.context as C
import pwndbg.color.memory as MemoryColor
import pwndbg.color.message as MessageColor
import pwndbg.color.syntax_highlight as H
import pwndbg.enhance
import pwndbg.gdblib.memory
@ -18,6 +19,7 @@ import pwndbg.gdblib.symbol
import pwndbg.gdblib.typeinfo
import pwndbg.gdblib.vmmap
import pwndbg.lib.config
import pwndbg.lib.disasm.helpers as bit_math
from pwndbg.emu.emulator import Emulator
from pwndbg.gdblib.disasm.instruction import FORWARD_JUMP_GROUP
from pwndbg.gdblib.disasm.instruction import EnhancedOperand
@ -394,10 +396,9 @@ class DisassemblyAssistant:
address: int,
size: int,
instruction: PwndbgInstruction,
operand: EnhancedOperand,
emu: Emulator,
) -> int | None:
address_list = self._telescope(address, 1, instruction, operand, emu, read_size=size)
address_list = self._telescope(address, 1, instruction, emu, read_size=size)
if len(address_list) >= 2:
return address_list[1]
@ -426,7 +427,7 @@ class DisassemblyAssistant:
elif operand.type == CS_OP_MEM:
# Assume that we are reading ptrsize - subclasses should override this function
# to provide a more specific value if needed
self._read_memory(value, pwndbg.gdblib.arch.ptrsize, instruction, operand, emu)
self._read_memory(value, pwndbg.gdblib.arch.ptrsize, instruction, emu)
return None
@ -435,7 +436,6 @@ class DisassemblyAssistant:
address: int,
limit: int,
instruction: PwndbgInstruction,
operand: EnhancedOperand,
emu: Emulator,
read_size: int = None,
) -> List[int]:
@ -470,7 +470,7 @@ class DisassemblyAssistant:
else:
return pwndbg.chain.get(address, limit=limit)
elif not can_read_process_state or operand.type == CS_OP_IMM:
else:
# If the target address is in a non-writeable map, we can pretty safely telescope
# This is best-effort to give a better experience
@ -746,7 +746,6 @@ class DisassemblyAssistant:
left.after_value,
TELESCOPE_DEPTH + 1,
instruction,
left,
emu,
read_size=pwndbg.gdblib.arch.ptrsize,
)
@ -801,5 +800,79 @@ class DisassemblyAssistant:
return handler
def _common_load_annotator(
self,
instruction: PwndbgInstruction,
emu: Emulator,
address: int | None,
read_size: int,
signed: bool,
target_size: int,
dest_str: str,
source_str: str,
) -> None:
"""
This function annotates load instructions - moving data from memory into a register.
These instructions read `read_size` bytes from memory into a register.
`signed`: whether or not we are loading a signed value from memory
`target_size`: the size of the register in bytes - relevent for sign-extension
`dest_str`: a string representing the destination register ('rax')
`source_str`: a string representing the source address ('[0x7fffffffe138]')
"""
if address is None:
return
# There are many cases we need to consider when we are loading a value from memory
# Were we able to reason about the memory address, and dereference it?
# Does the resolved memory address actual point into memory?
# If the target register size is larger than the read size, then do we need sign-extension?
# If the address is not mapped, we segfaulted
if not pwndbg.gdblib.memory.peek(address):
instruction.annotation = MessageColor.error(
f"<Cannot dereference [{MemoryColor.get(address)}]>"
)
else:
# In this branch, it is assumed that the address IS in a mapped page
TELESCOPE_DEPTH = max(1, int(pwndbg.config.disasm_telescope_depth))
telescope_addresses = self._telescope(
address,
TELESCOPE_DEPTH,
instruction,
emu,
read_size=read_size,
)
if len(telescope_addresses) == 1:
# If telescope returned only 1 address (and we already know the address is in a mapped page)
# it means we couldn't reason about the dereferenced memory.
# In this case, simply display the address
# As an example, this path is taken for the following case:
# mov rdi, qword ptr [rip + 0x17d40] where the resolved memory address is in writeable memory,
# and we are not emulating. This means we cannot savely dereference if PC is not at the current instruction address,
# because the the memory address could have been written to by the time the instruction executes
telescope_print = None
else:
if signed and read_size != target_size and len(telescope_addresses) == 2:
# We sign extend the value, then convert it back to the unsigned bit representation
final_value = bit_math.to_signed(telescope_addresses[1], read_size * 8) & (
(1 << (target_size * 8)) - 1
)
# If it's a signed read that required extension, it will just be a number with no special symbol/color needed
telescope_print = hex(final_value)
else:
# Start showing at dereferenced address, hence the [1:]
telescope_print = f"{self._telescope_format_list(telescope_addresses[1:], TELESCOPE_DEPTH, emu)}"
instruction.annotation = f"{dest_str}, {source_str}"
if telescope_print is not None:
instruction.annotation += f" => {telescope_print}"
generic_assistant = DisassemblyAssistant(None)

@ -5,6 +5,7 @@ from typing import Dict
from capstone import * # noqa: F403
from capstone.arm import * # noqa: F403
from pwnlib.util.misc import align_down
from typing_extensions import override
import pwndbg.gdblib.arch
@ -13,6 +14,7 @@ import pwndbg.gdblib.memory
import pwndbg.gdblib.regs
import pwndbg.lib.disasm.helpers as bit_math
from pwndbg.emu.emulator import Emulator
from pwndbg.gdblib.arch import read_thumb_bit as process_read_thumb_bit
from pwndbg.gdblib.disasm.instruction import EnhancedOperand
from pwndbg.gdblib.disasm.instruction import InstructionCondition
from pwndbg.gdblib.disasm.instruction import PwndbgInstruction
@ -25,6 +27,34 @@ ARM_BIT_SHIFT_MAP: Dict[int, Callable[[int, int, int], int]] = {
ARM_SFT_ROR: bit_math.rotate_right,
}
ARM_SINGLE_LOAD_INSTRUCTIONS = {
ARM_INS_LDRB: 1,
ARM_INS_LDRSB: -1,
ARM_INS_LDRH: 2,
ARM_INS_LDRSH: -2,
ARM_INS_LDR: 4,
ARM_INS_LDRBT: 1,
ARM_INS_LDRSBT: -1,
ARM_INS_LDRHT: 2,
ARM_INS_LDRSHT: -2,
ARM_INS_LDRT: 4,
ARM_INS_LDREXB: 1,
ARM_INS_LDREXH: 2,
ARM_INS_LDREX: 4,
}
ARM_SINGLE_STORE_INSTRUCTIONS = {
ARM_INS_STRB: 1,
ARM_INS_STRH: 2,
ARM_INS_STR: 4,
ARM_INS_STRBT: 1,
ARM_INS_STRHT: 2,
ARM_INS_STRT: 4,
ARM_INS_STREXB: 1,
ARM_INS_STREXH: 2,
ARM_INS_STREX: 4,
}
class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
def __init__(self, architecture: str) -> None:
@ -43,7 +73,19 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
@override
def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
# Dispatch to the correct handler
if instruction.id in ARM_SINGLE_LOAD_INSTRUCTIONS:
read_size = ARM_SINGLE_LOAD_INSTRUCTIONS[instruction.id]
self._common_load_annotator(
instruction,
emu,
instruction.operands[1].before_value,
abs(read_size),
read_size < 0,
4,
instruction.operands[0].str,
instruction.operands[1].str,
)
else:
self.annotation_handlers.get(instruction.id, lambda *a: None)(instruction, emu)
@override
@ -99,8 +141,8 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
target = target & ~1
return target
@override
def _memory_string(self, instruction: PwndbgInstruction, op: EnhancedOperand) -> str:
# Currently not used
def _memory_string_old(self, instruction: PwndbgInstruction, op: EnhancedOperand) -> str:
parts = []
if op.mem.base != 0:
@ -116,10 +158,79 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
return f"[{(', '.join(parts))}]"
def read_thumb_bit(self, instruction: PwndbgInstruction, emu: Emulator) -> int | None:
if emu:
return emu.read_thumb_bit()
elif self.can_reason_about_process_state(instruction):
# Read the Thumb bit directly from the process flag register if we can
return process_read_thumb_bit()
else:
return 0
@override
def _immediate_string(self, instruction, operand):
return "#" + super()._immediate_string(instruction, operand)
@override
def _read_register(
self, instruction: PwndbgInstruction, operand_id: int, emu: Emulator
) -> int | None:
# When `pc` is referenced in an operand (typically in a memory operand), the value it takes on
# is `pc_at_instruction + 8`. In Thumb mode, you only add 4 to the instruction address.
if operand_id == ARM_REG_PC:
return instruction.address + (4 if self.read_thumb_bit(instruction, emu) else 8)
return super()._read_register(instruction, operand_id, emu)
@override
def _parse_memory(
self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator
) -> int | None:
"""
Parse the `ArmOpMem` Capstone object to determine the concrete memory address used.
Types of memory operands:
[Rn]
[Rn, #imm]
[Rn, Rm]
[Rn, Rm, <shift> #imm]
Capstone represents the object a bit differently then AArch64 to align with the underlying architecture of Arm.
This representation will change in Capstone 6:
https://github.com/capstone-engine/capstone/issues/2281
https://github.com/capstone-engine/capstone/pull/1949
"""
target = 0
# All memory operands have `base` defined
base = self._read_register(instruction, op.mem.base, emu)
if base is None:
return None
if op.mem.base == ARM_REG_PC:
# The PC as the base register is a special case - it will align the address to a word (32-bit) boundary
# Explanation: https://stackoverflow.com/a/29588678
# See "Operation" at the bottom of https://developer.arm.com/documentation/ddi0597/2024-03/Base-Instructions/LDR--literal---Load-Register--literal--
base = align_down(4, base)
target = base + op.mem.disp
# If there is an index register
if op.mem.index != 0:
index = self._read_register(instruction, op.mem.index, emu)
if index is None:
return None
# Optionally apply shift to the index register
if op.cs_op.shift.type != 0:
index = ARM_BIT_SHIFT_MAP[op.cs_op.shift.type](index, op.cs_op.shift.value, 32)
target += index * (-1 if op.cs_op.subtracted else 1)
return target
@override
def _parse_register(
self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator

@ -75,23 +75,9 @@ MIPS_SIMPLE_DESTINATION_INSTRUCTIONS = {
MIPS_INS_DCLZ,
MIPS_INS_DSUB,
MIPS_INS_DSUBU,
MIPS_INS_LB,
MIPS_INS_LBU,
MIPS_INS_LD,
MIPS_INS_LDL,
MIPS_INS_LDPC,
MIPS_INS_LDR,
MIPS_INS_LH,
MIPS_INS_LHU,
MIPS_INS_LSA,
MIPS_INS_DLSA,
MIPS_INS_LUI,
MIPS_INS_LW,
MIPS_INS_LWL,
MIPS_INS_LWPC,
MIPS_INS_LWR,
MIPS_INS_LWU,
MIPS_INS_LWUPC,
MIPS_INS_MFHI,
MIPS_INS_MFLO,
MIPS_INS_SEB,
@ -105,6 +91,26 @@ MIPS_SIMPLE_DESTINATION_INSTRUCTIONS = {
MIPS_INS_SLTI,
MIPS_INS_SLTIU,
MIPS_INS_SLTU,
# Rare - unaligned read - have complex loading logic
MIPS_INS_LDL,
MIPS_INS_LDR,
# Rare - partial load on portions of address
MIPS_INS_LWL,
MIPS_INS_LWR,
}
# All MIPS load instructions
MIPS_LOAD_INSTRUCTIONS = {
MIPS_INS_LB: 1,
MIPS_INS_LBU: 1,
MIPS_INS_LH: 2,
MIPS_INS_LHU: 2,
MIPS_INS_LW: 4,
MIPS_INS_LWU: 4,
MIPS_INS_LWPC: 4,
MIPS_INS_LWUPC: 4,
MIPS_INS_LD: 8,
MIPS_INS_LDPC: 8,
}
@ -115,7 +121,20 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
@override
def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
if instruction.id in MIPS_SIMPLE_DESTINATION_INSTRUCTIONS:
if instruction.id in MIPS_LOAD_INSTRUCTIONS:
read_size = MIPS_LOAD_INSTRUCTIONS[instruction.id]
self._common_load_annotator(
instruction,
emu,
instruction.operands[1].before_value,
abs(read_size),
read_size < 0,
pwndbg.gdblib.arch.ptrsize,
instruction.operands[0].str,
instruction.operands[1].str,
)
elif instruction.id in MIPS_SIMPLE_DESTINATION_INSTRUCTIONS:
self._common_generic_register_destination(instruction, emu)
@override
@ -152,5 +171,20 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
return super()._resolve_target(instruction, emu, call)
@override
def _parse_memory(
self,
instruction: PwndbgInstruction,
op: pwndbg.gdblib.disasm.arch.EnhancedOperand,
emu: Emulator,
) -> int | None:
"""
Parse the `MipsOpMem` Capstone object to determine the concrete memory address used.
"""
base = self._read_register(instruction, op.mem.base, emu)
if base is None:
return None
return base + op.mem.disp
assistant = DisassemblyAssistant("mips")

@ -4,20 +4,107 @@ from capstone import * # noqa: F403
from capstone.riscv import * # noqa: F403
from typing_extensions import override
import pwndbg.color.memory as MemoryColor
import pwndbg.gdblib.arch
import pwndbg.gdblib.disasm.arch
import pwndbg.gdblib.regs
import pwndbg.lib.disasm.helpers as bit_math
from pwndbg.emu.emulator import Emulator
from pwndbg.gdblib.disasm.instruction import EnhancedOperand
from pwndbg.gdblib.disasm.instruction import InstructionCondition
from pwndbg.gdblib.disasm.instruction import PwndbgInstruction
RISCV_LOAD_INSTRUCTIONS = {
# Sign-extend loads
RISCV_INS_LB: -1,
RISCV_INS_LH: -2,
RISCV_INS_LW: -4,
# Zero-extend loads
RISCV_INS_LBU: 1,
RISCV_INS_LHU: 2,
RISCV_INS_LWU: 4,
RISCV_INS_LD: 8,
}
# Due to a bug in Capstone, these instructions have incorrect operands to represent a memory address.
# So we temporarily separate them to handle them differently
# This will be fixed in Capstone 6 - https://github.com/capstone-engine/capstone/pull/2393
# TODO: remove this when updating to Capstone 6
RISCV_COMPRESSED_LOAD_INSTRUCTIONS = {RISCV_INS_C_LW: -4, RISCV_INS_C_LD: 8, RISCV_INS_C_LDSP: 8}
RISCV_STORE_INSTRUCTIONS = {
RISCV_INS_SB: 1,
RISCV_INS_SH: 2,
RISCV_INS_SW: 4,
RISCV_INS_SD: 8,
}
# TODO: remove this when updating to Capstone 6
RISCV_COMPRESSED_STORE_INSTRUCTIONS = {
RISCV_INS_C_SW: 4,
RISCV_INS_C_SD: 8,
}
class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
def __init__(self, architecture) -> None:
super().__init__(architecture)
self.architecture = architecture
@override
def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
if instruction.id in RISCV_LOAD_INSTRUCTIONS:
read_size = RISCV_LOAD_INSTRUCTIONS[instruction.id]
self._common_load_annotator(
instruction,
emu,
instruction.operands[1].before_value,
abs(read_size),
read_size < 0,
pwndbg.gdblib.arch.ptrsize,
instruction.operands[0].str,
instruction.operands[1].str,
)
# TODO: remove this when updating to Capstone 6
if instruction.id in RISCV_COMPRESSED_LOAD_INSTRUCTIONS:
# We need to manually resolve this now since Capstone doesn't properly represent
# memory operands for compressed instructions.
address = self._resolve_compressed_target_addr(instruction, emu)
if address is not None:
read_size = RISCV_COMPRESSED_LOAD_INSTRUCTIONS[instruction.id]
dest_str = f"[{MemoryColor.get_address_or_symbol(address)}]"
self._common_load_annotator(
instruction,
emu,
address,
abs(read_size),
read_size < 0,
pwndbg.gdblib.arch.ptrsize,
instruction.operands[0].str,
dest_str,
)
return super()._set_annotation_string(instruction, emu)
def _resolve_compressed_target_addr(
self, instruction: PwndbgInstruction, emu: Emulator
) -> int | None:
"""
Calculate the address used in a compressed load/store instruction.
None if address cannot be resolved.
TODO: remove this when updating to Capstone 6
"""
_, disp, reg = instruction.operands
if disp.before_value is None or reg.before_value is None:
return None
return disp.before_value + reg.before_value
def _is_condition_taken(
self, instruction: PwndbgInstruction, emu: Emulator | None
) -> InstructionCondition:
@ -102,6 +189,18 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
return super()._resolve_target(instruction, emu, call)
@override
def _parse_memory(
self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator
) -> int | None:
"""
Parse the `RISCVOpMem` Capstone object to determine the concrete memory address used.
"""
base = self._read_register(instruction, op.mem.base, emu)
if base is None:
return None
return base + op.mem.disp
assistant_rv32 = DisassemblyAssistant("rv32")
assistant_rv64 = DisassemblyAssistant("rv64")

@ -0,0 +1,20 @@
from __future__ import annotations
from capstone.sparc import * # noqa: F403
# Instruction groups for future use
SPARC_LOAD_INSTRUCTIONS = {
SPARC_INS_LDUB: 1,
SPARC_INS_LDSB: 1,
SPARC_INS_LDUH: 2,
SPARC_INS_LDSH: 2,
SPARC_INS_LD: 4,
SPARC_INS_LDD: 8,
}
SPARC_STORE_INSTRUCTIONS = {
SPARC_INS_STB: 1,
SPARC_INS_STH: 2,
SPARC_INS_ST: 4,
SPARC_INS_STD: 8,
}

@ -79,8 +79,20 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
def handle_mov(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
left, right = instruction.operands
# Read from right operand
if right.before_value is not None:
# If this is a LOAD operation - MOV REG, [MEM]
if left.type == CS_OP_REG and right.type == CS_OP_MEM:
self._common_load_annotator(
instruction,
emu,
right.before_value,
right.cs_op.size,
False,
right.cs_op.size,
left.str,
right.str,
)
# Handle other cases of MOV
elif right.before_value is not None:
TELESCOPE_DEPTH = max(0, int(pwndbg.config.disasm_telescope_depth))
# +1 to ensure we telescope enough to read at least one address for the last "elif" below
@ -88,7 +100,6 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
right.before_value,
TELESCOPE_DEPTH + 1,
instruction,
right,
emu,
read_size=right.cs_op.size,
)
@ -111,36 +122,6 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
elif left.type == CS_OP_REG and right.type in (CS_OP_REG, CS_OP_IMM):
instruction.annotation = f"{left.str} => {super()._telescope_format_list(telescope_addresses, TELESCOPE_DEPTH, emu)}"
# MOV REG, [MEM]
elif left.type == CS_OP_REG and right.type == CS_OP_MEM:
# There are many cases we need to consider if there is a mov from a dereference memory location into a register
# Were we able to reason about the memory address, and dereference it?
# Does the resolved memory address actual point into memory?
# right.before_value should be a pointer in this context. If we telescoped and still returned just the value itself,
# it indicates that the dereference likely segfaults
if not pwndbg.gdblib.memory.peek(right.before_value):
telescope_print = MessageColor.error(
f"<Cannot dereference [{MemoryColor.get(right.before_value)}]>"
)
elif len(telescope_addresses) == 1:
# If only one address, and we didn't telescope, it means we couldn't reason about the dereferenced memory
# Simply display the address
# As an example, this path is taken for the following case:
# mov rdi, qword ptr [rip + 0x17d40] where the resolved memory address is in writeable memory,
# and we are not emulating. This means we cannot savely dereference (if PC is not at the current instruction address)
telescope_print = None
else:
# Start showing at dereferenced by, hence the [1:]
telescope_print = f"{super()._telescope_format_list(telescope_addresses[1:], TELESCOPE_DEPTH, emu)}"
if telescope_print is not None:
instruction.annotation = f"{left.str}, {right.str} => {telescope_print}"
else:
instruction.annotation = f"{left.str}, {right.str}"
def handle_vmovaps(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
# If the source or destination is in memory, it must be aligned to:
# 16 bytes for SSE, 32 bytes for AVX, 64 bytes for AVX-512
@ -172,7 +153,7 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
if right.before_value is not None:
telescope_addresses = super()._telescope(
right.before_value, TELESCOPE_DEPTH, instruction, right, emu
right.before_value, TELESCOPE_DEPTH, instruction, emu
)
instruction.annotation = f"{left.str} => {super()._telescope_format_list(telescope_addresses, TELESCOPE_DEPTH, emu)}"
@ -281,7 +262,7 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
return None
if operand.type == CS_OP_MEM:
return self._read_memory(value, operand.cs_op.size, instruction, operand, emu)
return self._read_memory(value, operand.cs_op.size, instruction, emu)
else:
return super()._resolve_used_value(value, instruction, operand, emu)

Loading…
Cancel
Save