You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pwndbg/pwndbg/aglib/disasm/aarch64.py

471 lines
16 KiB
Python

from __future__ import annotations
from typing import TYPE_CHECKING
from typing import Callable
from typing import Dict
from capstone import * # noqa: F403
from capstone.arm64 import * # noqa: F403
from typing_extensions import override
import pwndbg.aglib.arch
import pwndbg.aglib.disasm.arch
import pwndbg.aglib.memory
import pwndbg.aglib.regs
import pwndbg.enhance
import pwndbg.lib.disasm.helpers as bit_math
from pwndbg.aglib.disasm.arch import register_assign
from pwndbg.aglib.disasm.instruction import ALL_JUMP_GROUPS
from pwndbg.aglib.disasm.instruction import EnhancedOperand
from pwndbg.aglib.disasm.instruction import InstructionCondition
from pwndbg.aglib.disasm.instruction import PwndbgInstruction
from pwndbg.aglib.disasm.instruction import boolean_to_instruction_condition
# Emulator currently requires GDB, and we only use it here for type checking.
if TYPE_CHECKING:
from pwndbg.emu.emulator import Emulator
# Negative size indicates signed read
# None indicates the read size depends on the target register
AARCH64_SINGLE_LOAD_INSTRUCTIONS: Dict[int, int | None] = {
ARM64_INS_LDRB: 1,
ARM64_INS_LDURB: 1,
ARM64_INS_LDRSB: -1,
ARM64_INS_LDURSB: -1,
ARM64_INS_LDRH: 2,
ARM64_INS_LDURH: 2,
ARM64_INS_LDRSH: -2,
ARM64_INS_LDURSH: -2,
ARM64_INS_LDURSW: -4,
ARM64_INS_LDRSW: -4,
ARM64_INS_LDUR: None,
ARM64_INS_LDR: None,
ARM64_INS_LDTRB: 1,
ARM64_INS_LDTRSB: -1,
ARM64_INS_LDTRH: 2,
ARM64_INS_LDTRSH: -2,
ARM64_INS_LDTRSW: -4,
ARM64_INS_LDTR: None,
ARM64_INS_LDXRB: 1,
ARM64_INS_LDXRH: 2,
ARM64_INS_LDXR: None,
ARM64_INS_LDARB: 1,
ARM64_INS_LDARH: 2,
ARM64_INS_LDAR: None,
}
# None indicates that the write size depends on the source register
AARCH64_SINGLE_STORE_INSTRUCTIONS: Dict[int, int | None] = {
ARM64_INS_STRB: 1,
ARM64_INS_STURB: 1,
ARM64_INS_STRH: 2,
ARM64_INS_STURH: 2,
ARM64_INS_STUR: None,
ARM64_INS_STR: None,
# Store Register (unprivileged)
ARM64_INS_STTRB: 1,
ARM64_INS_STTRH: 2,
ARM64_INS_STTR: None,
# Store-Release
ARM64_INS_STLRB: 1,
ARM64_INS_STLRH: 2,
ARM64_INS_STLR: None,
}
# The first operand of these instructions gets the status result of the operation
AARCH64_EXCLUSIVE_STORE_INSTRUCTIONS = {
# Store Exclusive
ARM64_INS_STXRB: 1,
ARM64_INS_STXRH: 2,
ARM64_INS_STXR: None,
# Store-Release Exclusive
ARM64_INS_STLXRB: 1,
ARM64_INS_STLXRH: 2,
ARM64_INS_STLXR: None,
}
CONDITIONAL_SELECT_INSTRUCTIONS = {
ARM64_INS_CSEL,
ARM64_INS_CSINC,
ARM64_INS_CSINV,
ARM64_INS_CSNEG,
ARM64_INS_CSET,
ARM64_INS_CSETM,
ARM64_INS_CINC,
ARM64_INS_CINV,
ARM64_INS_CNEG,
}
AARCH64_EMULATED_ANNOTATIONS = CONDITIONAL_SELECT_INSTRUCTIONS | {
ARM64_INS_SXTB,
ARM64_INS_SXTH,
ARM64_INS_SXTW,
ARM64_INS_UXTB,
ARM64_INS_UXTH,
ARM64_INS_UXTW,
ARM64_INS_RBIT,
ARM64_INS_CLS,
ARM64_INS_CLZ,
ARM64_INS_BFXIL,
ARM64_INS_UBFIZ,
ARM64_INS_UBFM,
ARM64_INS_UBFX,
ARM64_INS_SBFIZ,
ARM64_INS_SBFM,
ARM64_INS_SBFX,
ARM64_INS_BFI,
ARM64_INS_NEG,
ARM64_INS_NEGS,
ARM64_INS_REV,
ARM64_INS_BIC,
ARM64_INS_BICS,
}
# Parameters to each function: (value, shift_amt, bit_width)
AARCH64_BIT_SHIFT_MAP: Dict[int, Callable[[int, int, int], int]] = {
ARM64_SFT_LSL: bit_math.logical_shift_left,
ARM64_SFT_LSR: bit_math.logical_shift_right,
ARM64_SFT_ASR: bit_math.arithmetic_shift_right,
ARM64_SFT_ROR: bit_math.rotate_right,
}
# These are "Extend" operations - https://devblogs.microsoft.com/oldnewthing/20220728-00/?p=106912
# They take in a number, extract a byte, halfword, or word,
# and perform a zero- or sign-extend operation.
AARCH64_EXTEND_MAP: Dict[int, Callable[[int], int]] = {
ARM64_EXT_UXTB: lambda x: x & ((1 << 8) - 1),
ARM64_EXT_UXTH: lambda x: x & ((1 << 16) - 1),
ARM64_EXT_UXTW: lambda x: x & ((1 << 32) - 1),
ARM64_EXT_UXTX: lambda x: x, # UXTX has no effect. It extracts 64-bits from a 64-bit register.
ARM64_EXT_SXTB: lambda x: bit_math.to_signed(x, 8),
ARM64_EXT_SXTH: lambda x: bit_math.to_signed(x, 16),
ARM64_EXT_SXTW: lambda x: bit_math.to_signed(x, 32),
ARM64_EXT_SXTX: lambda x: bit_math.to_signed(x, 64),
}
AARCH64_MATH_INSTRUCTIONS = {
ARM64_INS_ADD: "+",
ARM64_INS_ADDS: "+",
ARM64_INS_SUB: "-",
ARM64_INS_SUBS: "-",
ARM64_INS_AND: "&",
ARM64_INS_ANDS: "&",
ARM64_INS_ORR: "&",
ARM64_INS_ASR: ">>s",
ARM64_INS_ASRV: ">>s",
ARM64_INS_EOR: "^",
ARM64_INS_LSL: "<<",
ARM64_INS_LSLV: "<<",
ARM64_INS_LSR: ">>",
ARM64_INS_LSRV: ">>",
ARM64_INS_UDIV: "/",
ARM64_INS_SDIV: "/",
ARM64_INS_SMULH: "*",
ARM64_INS_SMULL: "*",
ARM64_INS_UMULH: "*",
ARM64_INS_UMULL: "*",
ARM64_INS_MUL: "*",
}
def resolve_condition(condition: int, cpsr: int) -> InstructionCondition:
"""
Given a condition and the NZCV flag bits, determine when the condition is satisfied
The condition is a Capstone constant
"""
n = (cpsr >> 31) & 1
z = (cpsr >> 30) & 1
c = (cpsr >> 29) & 1
v = (cpsr >> 28) & 1
condition = {
ARM64_CC_INVALID: True, # Capstone uses this code for the 'B' instruction, the unconditional branch
ARM64_CC_EQ: z == 1,
ARM64_CC_NE: z == 0,
ARM64_CC_HS: c == 1,
ARM64_CC_LO: c == 0,
ARM64_CC_MI: n == 1,
ARM64_CC_PL: n == 0,
ARM64_CC_VS: v == 1,
ARM64_CC_VC: v == 0,
ARM64_CC_HI: c == 1 and z == 0,
ARM64_CC_LS: not (c == 1 and z == 0),
ARM64_CC_GE: n == v,
ARM64_CC_LT: n != v,
ARM64_CC_GT: z == 0 and n == v,
ARM64_CC_LE: not (z == 0 and n == v),
ARM64_CC_AL: True,
ARM64_CC_NV: True,
}.get(condition, False)
return InstructionCondition.TRUE if condition else InstructionCondition.FALSE
class DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant):
def __init__(self, architecture: str) -> None:
super().__init__(architecture)
self.annotation_handlers: Dict[int, Callable[[PwndbgInstruction, Emulator], None]] = {
# MOV
ARM64_INS_MOV: self._common_move_annotator,
# MOV WITH KEEP
ARM64_INS_MOVK: self._common_generic_register_destination,
# ADR
ARM64_INS_ADR: self._common_generic_register_destination,
# ADRP
ARM64_INS_ADRP: self._handle_adrp,
# CMP
ARM64_INS_CMP: self._common_cmp_annotator_builder("cpsr", "-"),
# CMN
ARM64_INS_CMN: self._common_cmp_annotator_builder("cpsr", "+"),
# TST (bitwise "and")
ARM64_INS_TST: self._common_cmp_annotator_builder("cpsr", "&"),
# CCMP (conditional compare)
ARM64_INS_CCMP: self._common_cmp_annotator_builder("cpsr", ""),
# CCMN
ARM64_INS_CCMN: self._common_cmp_annotator_builder("cpsr", ""),
}
@override
def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
# Dispatch to the correct handler
if instruction.id in AARCH64_SINGLE_LOAD_INSTRUCTIONS:
target_reg_size = self._register_width(instruction, instruction.operands[0]) // 8
read_size = AARCH64_SINGLE_LOAD_INSTRUCTIONS[instruction.id] or target_reg_size
self._common_load_annotator(
instruction,
emu,
instruction.operands[1].before_value,
abs(read_size),
read_size < 0,
target_reg_size,
instruction.operands[0].str,
instruction.operands[1].str,
)
elif instruction.id in AARCH64_SINGLE_STORE_INSTRUCTIONS:
self._common_store_annotator(
instruction,
emu,
instruction.operands[1].before_value,
instruction.operands[0].before_value,
AARCH64_SINGLE_STORE_INSTRUCTIONS[instruction.id],
instruction.operands[1].str,
)
elif instruction.id in AARCH64_EXCLUSIVE_STORE_INSTRUCTIONS:
self._common_store_annotator(
instruction,
emu,
instruction.operands[-1].before_value,
instruction.operands[-2].before_value,
AARCH64_EXCLUSIVE_STORE_INSTRUCTIONS[instruction.id],
instruction.operands[-1].str,
)
elif instruction.id in AARCH64_MATH_INSTRUCTIONS:
self._common_binary_op_annotator(
instruction,
emu,
instruction.operands[0],
instruction.operands[-2].before_value,
instruction.operands[-1].before_value,
AARCH64_MATH_INSTRUCTIONS[instruction.id],
)
elif instruction.id in AARCH64_EMULATED_ANNOTATIONS:
self._common_generic_register_destination(instruction, emu)
else:
self.annotation_handlers.get(instruction.id, lambda *a: None)(instruction, emu)
def _handle_adrp(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
result_operand, right = instruction.operands
if result_operand.str and right.before_value is not None:
address = right.before_value
TELESCOPE_DEPTH = max(0, int(pwndbg.config.disasm_telescope_depth))
addresses = self._telescope(address, TELESCOPE_DEPTH, instruction, emu)
telescope = self._telescope_format_list(addresses, TELESCOPE_DEPTH, emu)
instruction.annotation = register_assign(result_operand.str, telescope)
@override
def _condition(
self, instruction: PwndbgInstruction, emu: Emulator
) -> pwndbg.aglib.disasm.arch.InstructionCondition:
# In ARM64, only branches have the conditional code in the instruction,
# as opposed to ARM32 which allows most instructions to be conditional
if instruction.id == ARM64_INS_B:
# The B instruction can be made conditional by the condition codes
if instruction.cs_insn.cc in (ARM64_CC_INVALID, ARM64_CC_AL):
instruction.declare_conditional = False
else:
flags = super()._read_register_name(instruction, "cpsr", emu)
if flags is not None:
return resolve_condition(instruction.cs_insn.cc, flags)
elif instruction.id == ARM64_INS_CBNZ:
op_val = instruction.operands[0].before_value
return boolean_to_instruction_condition(op_val is not None and op_val != 0)
elif instruction.id == ARM64_INS_CBZ:
op_val = instruction.operands[0].before_value
return boolean_to_instruction_condition(op_val is not None and op_val == 0)
elif instruction.id == ARM64_INS_TBNZ:
op_val, bit = (
instruction.operands[0].before_value,
instruction.operands[1].before_value,
)
if op_val is not None and bit is not None:
return boolean_to_instruction_condition(bool((op_val >> bit) & 1))
elif instruction.id == ARM64_INS_TBZ:
op_val, bit = (
instruction.operands[0].before_value,
instruction.operands[1].before_value,
)
if op_val is not None and bit is not None:
return boolean_to_instruction_condition(not ((op_val >> bit) & 1))
elif instruction.id in CONDITIONAL_SELECT_INSTRUCTIONS:
# Capstone places the condition to be satisfied in the `cc` field of the instruction
# for all conditional select instructions
flags = self._read_register_name(instruction, "cpsr", emu)
if flags is not None:
return resolve_condition(instruction.cs_insn.cc, flags)
return super()._condition(instruction, emu)
@override
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
if not bool(instruction.groups & ALL_JUMP_GROUPS):
return None
if len(instruction.operands) > 0:
# For all AArch64 branches, the target is either an immediate or a register and is the last operand
return instruction.operands[-1].before_value
elif instruction.id == ARM64_INS_RET:
# If this is a ret WITHOUT an operand, it means we should read from the LR/x30 register
return super()._read_register_name(instruction, "lr", emu)
return super()._resolve_target(instruction, emu)
@override
def _parse_memory(
self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator
) -> int | None:
"""
Parse the `Arm64OpMem` Capstone object to determine the concrete memory address used.
Three types of AArch64 memory operands:
1. Register base with optional immediate offset
Examples:
ldrb w3, [x2]
str x1, [x2, #0xb58]
ldr x4,[x3], 4
2. Register + another register with an optional shift
Examples:
ldrb w1, [x9, x2]
str x1, [x2, x0, lsl #3]
3. Register + 32-bit register extended and shifted.
The shift in this case is implicitly a LSL
Examples:
ldr x1, [x2, w22, UXTW #3]
"""
target = 0
# All memory operands have `base` defined
base = self._read_register(instruction, op.mem.base, emu)
if base is None:
return None
target = base + op.mem.disp
# If there is an index register
if op.mem.index != 0:
index = self._read_register(instruction, op.mem.index, emu)
if index is None:
return None
# Optionally apply an extend to the index register
if op.cs_op.ext != 0:
index = AARCH64_EXTEND_MAP[op.cs_op.ext](index)
# Optionally apply shift to the index register
# This handles shifts in the extend operation as well:
# As in the case of `ldr x1, [x2, w22, UXTW #3]`,
# Capstone will automatically make the shift a LSL and set the value to 3
if op.cs_op.shift.type != 0:
# The form of instructions with a shift always apply the shift to a 64-bit value
index = AARCH64_BIT_SHIFT_MAP[op.cs_op.shift.type](index, op.cs_op.shift.value, 64)
target += index
return target
def _register_width(self, instruction: PwndbgInstruction, op: EnhancedOperand) -> int:
return 32 if instruction.cs_insn.reg_name(op.reg)[0] == "w" else 64
@override
def _parse_immediate(self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator):
"""
In AArch64, there can be an optional shift applied to constants, typically only a `LSL #12`
Ex:
cmp x8, #1, lsl #12 (1 << 12)
"""
target = op.imm
if target is None:
return None
if op.cs_op.shift.type != 0:
target = AARCH64_BIT_SHIFT_MAP[op.cs_op.shift.type](target, op.cs_op.shift.value, 64)
return target
@override
def _parse_register(
self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator
) -> int | None:
"""
Register operands can have optional extend and shift modifiers.
Ex:
cmp x5, x3, LSL #12 (x3 << 12)
cmp x5, w3, SXTB 4 (Signed extend byte, then left shift 4)
The extend operation is always applied first (if present), and then shifts take effect.
"""
target = super()._parse_register(instruction, op, emu)
if target is None:
return None
# The shift and sign-extend operations depend on the target bit width.
# This is sometimes implicit in the target register size, which is always
# the first operand.
target_bit_width = (
self._register_width(instruction, instruction.operands[0])
if instruction.operands[0].type == CS_OP_REG
else 64
)
if op.cs_op.ext != 0:
target = AARCH64_EXTEND_MAP[op.cs_op.ext](target) & ((1 << target_bit_width) - 1)
if op.cs_op.shift.type != 0:
print(target, op.cs_op.shift.type, op.cs_op.shift.value)
target = AARCH64_BIT_SHIFT_MAP[op.cs_op.shift.type](
target, op.cs_op.shift.value, target_bit_width
) & ((1 << target_bit_width) - 1)
return target
assistant = DisassemblyAssistant("aarch64")