diff --git a/pwndbg/lib/regs.py b/pwndbg/lib/regs.py index f89bd19fa..dc18ae793 100644 --- a/pwndbg/lib/regs.py +++ b/pwndbg/lib/regs.py @@ -5,6 +5,8 @@ standardized interface to registers like "sp" and "pc". from __future__ import annotations +import itertools +from collections import defaultdict from dataclasses import dataclass from typing import Dict from typing import Iterator @@ -14,13 +16,14 @@ from typing import Set from typing import Tuple from typing import Union +import pwndbg.lib.disasm.helpers as bit_math from pwndbg.lib.arch import PWNDBG_SUPPORTED_ARCHITECTURES_TYPE BitFlags = OrderedDict[str, Union[int, Tuple[int, int]]] @dataclass -class EmulatedRegister: +class UnicornRegisterWrite: """ Represent a register to write to the Unicorn emulator. """ @@ -29,6 +32,18 @@ class EmulatedRegister: force_write: bool +@dataclass +class Reg: + name: str + size: int | None = None + """Register width in bytes. None if the register size is arch.ptrsize""" + offset: int = 0 + """Relevant for subregisters - the offset of this register in the main register""" + zero_extend_writes: bool = False + """Upon writing a value to this subregister, are the higher bits of the full register zeroed out?""" + subregisters: tuple[Reg, ...] = () + + class RegisterSet: #: Program counter register pc: str @@ -63,49 +78,84 @@ class RegisterSet: #: All valid registers all: Set[str] + #: Reg objects containing information on each register + reg_definitions: Dict[str, Reg] + + #: Map of register name to the full register it resides in. Example mapping: "eax" -> Reg("rax") + full_register_lookup: Dict[str, Reg] + def __init__( self, - pc: str = "pc", - stack: str = "sp", - frame: str | None = None, - retaddr: Tuple[str, ...] = (), + pc: Reg = Reg("pc"), + stack: Reg = Reg("sp"), + frame: Reg | None = None, + retaddr: Tuple[Reg, ...] = (), flags: Dict[str, BitFlags] = {}, extra_flags: Dict[str, BitFlags] = {}, - gpr: Tuple[str, ...] = (), + gpr: Tuple[Reg, ...] = (), misc: Tuple[str, ...] = (), args: Tuple[str, ...] = (), retval: str | None = None, ) -> None: - self.pc = pc - self.stack = stack - self.frame = frame - self.retaddr = retaddr + self.pc = pc.name + self.stack = stack.name + self.frame = frame.name if frame else None + self.retaddr = tuple(x.name for x in retaddr) self.flags = flags self.extra_flags = extra_flags - self.gpr = gpr + self.gpr = tuple(x.name for x in gpr) self.misc = misc self.args = args self.retval = retval + all_subregisters: List[str] = [] + + self.reg_definitions = {} + self.full_register_lookup = {} + for reg in itertools.chain(gpr, (stack, frame, pc), retaddr): + if reg: + self.reg_definitions[reg.name] = reg + self.full_register_lookup[reg.name] = reg + for subregister in reg.subregisters: + self.reg_definitions[subregister.name] = subregister + self.full_register_lookup[subregister.name] = reg + all_subregisters.append(subregister.name) + # In 'common', we don't want to lose the ordering of: self.common = [] - for reg in gpr + (frame, stack, pc) + tuple(flags): - if reg and reg not in self.common: - self.common.append(reg) + for regname in itertools.chain( + self.gpr, (self.frame, self.stack, self.pc), tuple(self.flags) + ): + if regname and regname not in self.common: + self.common.append(regname) # The specific order of this list is very important: # Due to the behavior of Arm in the Unicorn engine, # we must write the flags register after PC, and the stack pointer after the flags register. # Otherwise, the values will be clobbered # https://github.com/pwndbg/pwndbg/pull/2337 - self.emulated_regs_order: List[EmulatedRegister] = [] + self.emulated_regs_order: List[UnicornRegisterWrite] = [] - for reg in [pc] + list(flags) + [stack, frame] + list(retaddr) + list(misc) + list(gpr): - if reg and reg not in self.emulated_regs_order: - emu_reg = EmulatedRegister(reg, True if reg in flags else False) + for regname in itertools.chain( + (self.pc,), + tuple(self.flags), + (self.stack, self.frame), + self.retaddr, + self.misc, + self.gpr, + ): + if regname and regname not in self.emulated_regs_order: + emu_reg = UnicornRegisterWrite(regname, True if regname in flags else False) self.emulated_regs_order.append(emu_reg) - self.all = set(misc) | set(flags) | set(extra_flags) | set(self.retaddr) | set(self.common) + self.all = ( + set(self.misc) + | set(self.flags) + | set(self.extra_flags) + | set(self.retaddr) + | set(self.common) + | set(all_subregisters) + ) self.all -= {None} self.all |= {"pc", "sp"} @@ -116,6 +166,173 @@ class RegisterSet: yield from self.all +class PsuedoEmulatedRegisterFile: + """ + This class represents a set of registers that can be written, read, and invalidated. + + The aim is to allow some manual dynamic/static analysis without the need for a full emulator. + + The implementation can handle the behavior of architectures with partial registers, + such as x86 (Ex: rax has "eax", "ax", "ah", and "al" as subregisters) or AArch64 (Ex: X0 contains W0). + Most of the complexity of the bitshifts and masks arise from the necessity to handle these cases. + """ + + masks: defaultdict[str, int] + """ + Map of register name to bitmask indicating what bits of the register we know the value of. + + Example: + { + "rax": 0xFFFF + } + This indicates that in the RAX register, we only know the bottom 16 bits. This likely resulted from writing the "ax" register. + Any attempt to read any other bits returns None. In this case, we can read from "ax", "ah", and "al", but not "eax" or "rax". + """ + + values: defaultdict[str, int] + """ + Map of register to the value we know it to have. + """ + + register_set: RegisterSet + ptrsize: int + + def __init__(self, register_set: RegisterSet, ptrsize: int): + self.register_set = register_set + self.ptrsize = ptrsize + + self.masks = defaultdict(int) + self.values = defaultdict(int) + + def write_register( + self, reg: str, value: int, source_width: int | None = None, sign_extend: bool = False + ) -> None: + """ + source_width is the byte width of the value's source. + It should be specified when the source has a width shorter than the destination register. + + Examples: + movsbl EAX, AL // sign extend 1 byte register to 4 byte register + movzbl EAX, AL // zero extend + + Source width would be 1, and in the first case sign_extend should be set to True. + If sign_extend is False, we zero extend. + """ + # Definition of the register we are writing + write_reg_def = self.register_set.reg_definitions.get(reg) + if write_reg_def is None: + return None + + register_bit_offset = write_reg_def.offset * 8 + written_register_size = ( + write_reg_def.size if write_reg_def.size is not None else self.ptrsize + ) + written_register_mask = (1 << (written_register_size * 8)) - 1 + + # Definition of the "full" register that the written register resides in. Might be itself. + full_reg_def = self.register_set.full_register_lookup[reg] + + # Handle zero / sign-extension + if source_width is not None: + # Ensure that if value is negative, it is converted to it's unsigned representation + value &= (1 << (source_width * 8)) - 1 + + # Sign-extend the value to the write_size + if sign_extend: + value = bit_math.to_signed(value, source_width * 8) & written_register_mask + + # Bitmask of the register positioned in the full register. Ex: ah register is bits [15-8] in RAX. + value_mask = written_register_mask << register_bit_offset + + # The bits we will place into the register + written_bits = (value << register_bit_offset) & value_mask + + if write_reg_def.zero_extend_writes: + full_reg_size = full_reg_def.size if full_reg_def.size is not None else self.ptrsize + full_reg_mask = (1 << (full_reg_size * 8)) - 1 + # Bitmask indicating the bits that this write is setting. + overriden_bits_mask = full_reg_mask + else: + overriden_bits_mask = value_mask + + # Clear bits of current value where new value is being written. + value_masked_for_placement = self.values[full_reg_def.name] & ~overriden_bits_mask + + self.masks[full_reg_def.name] = overriden_bits_mask | self.masks[full_reg_def.name] + self.values[full_reg_def.name] = written_bits | value_masked_for_placement + + def read_register(self, reg: str) -> int | None: + # Definition of the register we are reading + write_reg_def = self.register_set.reg_definitions.get(reg) + if write_reg_def is None: + return None + + register_bit_offset = write_reg_def.offset * 8 + written_register_size = ( + write_reg_def.size if write_reg_def.size is not None else self.ptrsize + ) + written_register_mask = (1 << (written_register_size * 8)) - 1 + + # Definition of the "full" register that the read register resides in. Might be itself. + full_reg_def = self.register_set.full_register_lookup[reg] + + mask = self.masks[full_reg_def.name] + if mask == 0: + return None + + read_mask = written_register_mask << register_bit_offset + + if mask & read_mask != read_mask: + # Not all of the bits that we are attempting to read are readable. + return None + + return (self.values[full_reg_def.name] & read_mask) >> register_bit_offset + + def invalidate_all_registers(self) -> None: + self.masks.clear() + + def invalidate_register(self, reg: str) -> None: + """ + Invalidate the bits that a write to this register would override. + + This can be used when we statically detect that a register is written, but + we don't know the concrete value that is written so we have to invalidate any current + knowledge of the register's bits. + """ + # Definition of the register we are invalidating + written_reg_def = self.register_set.reg_definitions.get(reg) + if written_reg_def is None: + return None + + register_bit_offset = written_reg_def.offset * 8 + written_register_size = ( + written_reg_def.size if written_reg_def.size is not None else self.ptrsize + ) + written_register_mask = (1 << (written_register_size * 8)) - 1 + + # Definition of the "full" register that the written register resides in. Might be itself. + full_reg_def = self.register_set.full_register_lookup[reg] + + value_mask = written_register_mask << register_bit_offset + + if written_reg_def.zero_extend_writes: + full_reg_size = full_reg_def.size if full_reg_def.size is not None else self.ptrsize + full_reg_mask = (1 << (full_reg_size * 8)) - 1 + new_mask = full_reg_mask + else: + new_mask = value_mask + + self.masks[full_reg_def.name] = ~new_mask & self.masks[full_reg_def.name] + + def __repr__(self): + return str( + { + "masks": {x: hex(y) for x, y in self.masks.items()}, + "values": {x: hex(y) for x, y in self.values.items()}, + } + ) + + arm_cpsr_flags = BitFlags( [ ("N", 31), @@ -204,25 +421,53 @@ aarch64_scr_flags = BitFlags( ) arm = RegisterSet( - retaddr=("lr",), + retaddr=(Reg("lr", 4),), flags={"cpsr": arm_cpsr_flags}, - gpr=("r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"), + gpr=( + Reg("r0", 4), + Reg("r1", 4), + Reg("r2", 4), + Reg("r3", 4), + Reg("r4", 4), + Reg("r5", 4), + Reg("r6", 4), + Reg("r7", 4), + Reg("r8", 4), + Reg("r9", 4), + Reg("r10", 4), + Reg("r11", 4), + Reg("r12", 4), + ), args=("r0", "r1", "r2", "r3"), retval="r0", ) # ARM Cortex-M armcm = RegisterSet( - retaddr=("lr",), + retaddr=(Reg("lr", 4),), flags={"xpsr": arm_xpsr_flags}, - gpr=("r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"), + gpr=( + Reg("r0", 4), + Reg("r1", 4), + Reg("r2", 4), + Reg("r3", 4), + Reg("r4", 4), + Reg("r5", 4), + Reg("r6", 4), + Reg("r7", 4), + Reg("r8", 4), + Reg("r9", 4), + Reg("r10", 4), + Reg("r11", 4), + Reg("r12", 4), + ), args=("r0", "r1", "r2", "r3"), retval="r0", ) # AArch64 has a PSTATE register, but GDB represents it as the CPSR register aarch64 = RegisterSet( - retaddr=("lr",), + retaddr=(Reg("lr", 8),), flags={"cpsr": aarch64_cpsr_flags}, extra_flags={ "scr_el3": aarch64_scr_flags, @@ -239,72 +484,42 @@ aarch64 = RegisterSet( # pointer semantics are required for other functionalities. # frame = 'x29', gpr=( - "x0", - "x1", - "x2", - "x3", - "x4", - "x5", - "x6", - "x7", - "x8", - "x9", - "x10", - "x11", - "x12", - "x13", - "x14", - "x15", - "x16", - "x17", - "x18", - "x19", - "x20", - "x21", - "x22", - "x23", - "x24", - "x25", - "x26", - "x27", - "x28", - "x29", - ), - misc=( - "w0", - "w1", - "w2", - "w3", - "w4", - "w5", - "w6", - "w7", - "w8", - "w9", - "w10", - "w11", - "w12", - "w13", - "w14", - "w15", - "w16", - "w17", - "w18", - "w19", - "w20", - "w21", - "w22", - "w23", - "w24", - "w25", - "w26", - "w27", - "w28", + Reg("x0", 8, subregisters=(Reg("w0", 4, zero_extend_writes=True),)), + Reg("x1", 8, subregisters=(Reg("w1", 4, zero_extend_writes=True),)), + Reg("x2", 8, subregisters=(Reg("w2", 4, zero_extend_writes=True),)), + Reg("x3", 8, subregisters=(Reg("w3", 4, zero_extend_writes=True),)), + Reg("x4", 8, subregisters=(Reg("w4", 4, zero_extend_writes=True),)), + Reg("x5", 8, subregisters=(Reg("w5", 4, zero_extend_writes=True),)), + Reg("x6", 8, subregisters=(Reg("w6", 4, zero_extend_writes=True),)), + Reg("x7", 8, subregisters=(Reg("w7", 4, zero_extend_writes=True),)), + Reg("x8", 8, subregisters=(Reg("w8", 4, zero_extend_writes=True),)), + Reg("x9", 8, subregisters=(Reg("w9", 4, zero_extend_writes=True),)), + Reg("x10", 8, subregisters=(Reg("w10", 4, zero_extend_writes=True),)), + Reg("x11", 8, subregisters=(Reg("w11", 4, zero_extend_writes=True),)), + Reg("x12", 8, subregisters=(Reg("w12", 4, zero_extend_writes=True),)), + Reg("x13", 8, subregisters=(Reg("w13", 4, zero_extend_writes=True),)), + Reg("x14", 8, subregisters=(Reg("w14", 4, zero_extend_writes=True),)), + Reg("x15", 8, subregisters=(Reg("w15", 4, zero_extend_writes=True),)), + Reg("x16", 8, subregisters=(Reg("w16", 4, zero_extend_writes=True),)), + Reg("x17", 8, subregisters=(Reg("w17", 4, zero_extend_writes=True),)), + Reg("x18", 8, subregisters=(Reg("w18", 4, zero_extend_writes=True),)), + Reg("x19", 8, subregisters=(Reg("w19", 4, zero_extend_writes=True),)), + Reg("x20", 8, subregisters=(Reg("w20", 4, zero_extend_writes=True),)), + Reg("x21", 8, subregisters=(Reg("w21", 4, zero_extend_writes=True),)), + Reg("x22", 8, subregisters=(Reg("w22", 4, zero_extend_writes=True),)), + Reg("x23", 8, subregisters=(Reg("w23", 4, zero_extend_writes=True),)), + Reg("x24", 8, subregisters=(Reg("w24", 4, zero_extend_writes=True),)), + Reg("x25", 8, subregisters=(Reg("w25", 4, zero_extend_writes=True),)), + Reg("x26", 8, subregisters=(Reg("w26", 4, zero_extend_writes=True),)), + Reg("x27", 8, subregisters=(Reg("w27", 4, zero_extend_writes=True),)), + Reg("x28", 8, subregisters=(Reg("w28", 4, zero_extend_writes=True),)), + Reg("x29", 8, subregisters=(Reg("w29", 4, zero_extend_writes=True),)), ), args=("x0", "x1", "x2", "x3"), retval="x0", ) + x86flags = { "eflags": BitFlags( [("CF", 0), ("PF", 2), ("AF", 4), ("ZF", 6), ("SF", 7), ("IF", 9), ("DF", 10), ("OF", 11)] @@ -312,25 +527,149 @@ x86flags = { } amd64 = RegisterSet( - pc="rip", - stack="rsp", - frame="rbp", + pc=Reg("rip"), + stack=Reg( + "rsp", + 8, + subregisters=(Reg("esp", 4, 0, zero_extend_writes=True), Reg("sp", 2, 0), Reg("spl", 1, 0)), + ), + frame=Reg( + "rbp", + 8, + subregisters=(Reg("ebp", 4, 0, zero_extend_writes=True), Reg("bp", 2, 0), Reg("bpl", 1, 0)), + ), flags=x86flags, gpr=( - "rax", - "rbx", - "rcx", - "rdx", - "rdi", - "rsi", - "r8", - "r9", - "r10", - "r11", - "r12", - "r13", - "r14", - "r15", + Reg( + "rax", + 8, + subregisters=( + Reg("eax", 4, 0, zero_extend_writes=True), + Reg("ax", 2, 0), + Reg("ah", 1, 1), + Reg("al", 1, 0), + ), + ), + Reg( + "rbx", + 8, + subregisters=( + Reg("ebx", 4, 0, zero_extend_writes=True), + Reg("bx", 2, 0), + Reg("bh", 1, 1), + Reg("bl", 1, 0), + ), + ), + Reg( + "rcx", + 8, + subregisters=( + Reg("ecx", 4, 0, zero_extend_writes=True), + Reg("cx", 2, 0), + Reg("ch", 1, 1), + Reg("cl", 1, 0), + ), + ), + Reg( + "rdx", + 8, + subregisters=( + Reg("edx", 4, 0, zero_extend_writes=True), + Reg("dx", 2, 0), + Reg("dh", 1, 1), + Reg("dl", 1, 0), + ), + ), + Reg( + "rdi", + 8, + subregisters=( + Reg("edi", 4, 0, zero_extend_writes=True), + Reg("di", 2, 0), + Reg("dil", 1, 0), + ), + ), + Reg( + "rsi", + 8, + subregisters=( + Reg("esi", 4, 0, zero_extend_writes=True), + Reg("si", 2, 0), + Reg("sil", 1, 0), + ), + ), + Reg( + "r8", + 8, + subregisters=( + Reg("r8d", 4, 0, zero_extend_writes=True), + Reg("r8w", 2, 0), + Reg("r8b", 1, 0), + ), + ), + Reg( + "r9", + 8, + subregisters=( + Reg("r9d", 4, 0, zero_extend_writes=True), + Reg("r9w", 2, 0), + Reg("r9b", 1, 0), + ), + ), + Reg( + "r10", + 8, + subregisters=( + Reg("r10d", 4, 0, zero_extend_writes=True), + Reg("r10w", 2, 0), + Reg("r10b", 1, 0), + ), + ), + Reg( + "r11", + 8, + subregisters=( + Reg("r11d", 4, 0, zero_extend_writes=True), + Reg("r11w", 2, 0), + Reg("r11b", 1, 0), + ), + ), + Reg( + "r12", + 8, + subregisters=( + Reg("r12d", 4, 0, zero_extend_writes=True), + Reg("r12w", 2, 0), + Reg("r12b", 1, 0), + ), + ), + Reg( + "r13", + 8, + subregisters=( + Reg("r13d", 4, 0, zero_extend_writes=True), + Reg("r13w", 2, 0), + Reg("r13b", 1, 0), + ), + ), + Reg( + "r14", + 8, + subregisters=( + Reg("r14d", 4, 0, zero_extend_writes=True), + Reg("r14w", 2, 0), + Reg("r14b", 1, 0), + ), + ), + Reg( + "r15", + 8, + subregisters=( + Reg("r15d", 4, 0, zero_extend_writes=True), + Reg("r15w", 2, 0), + Reg("r15b", 1, 0), + ), + ), ), misc=( "cs", @@ -341,26 +680,6 @@ amd64 = RegisterSet( "gs", "fsbase", "gsbase", - "ax", - "ah", - "al", - "bx", - "bh", - "bl", - "cx", - "ch", - "cl", - "dx", - "dh", - "dl", - "dil", - "sil", - "spl", - "bpl", - "di", - "si", - "bp", - "sp", "ip", ), args=("rdi", "rsi", "rdx", "rcx", "r8", "r9"), @@ -368,11 +687,42 @@ amd64 = RegisterSet( ) i386 = RegisterSet( - pc="eip", - stack="esp", - frame="ebp", + pc=Reg("eip"), + stack=Reg("esp", 4, subregisters=(Reg("sp", 2, 0),)), + frame=Reg("ebp", 4, subregisters=(Reg("bp", 2, 0),)), flags=x86flags, - gpr=("eax", "ebx", "ecx", "edx", "edi", "esi"), + gpr=( + Reg( + "eax", + 4, + subregisters=(Reg("ax", 2, 0), Reg("ah", 1, 1), Reg("al", 1, 0)), + ), + Reg( + "ebx", + 4, + subregisters=(Reg("bx", 2, 0), Reg("bh", 1, 1), Reg("bl", 1, 0)), + ), + Reg( + "ecx", + 4, + subregisters=(Reg("cx", 2, 0), Reg("ch", 1, 1), Reg("cl", 1, 0)), + ), + Reg( + "edx", + 4, + subregisters=(Reg("dx", 2, 0), Reg("dh", 1, 1), Reg("dl", 1, 0)), + ), + Reg( + "edi", + 4, + subregisters=(Reg("di", 2, 0),), + ), + Reg( + "esi", + 4, + subregisters=(Reg("si", 2, 0),), + ), + ), misc=( "cs", "ss", @@ -382,27 +732,12 @@ i386 = RegisterSet( "gs", "fsbase", "gsbase", - "ax", - "ah", - "al", - "bx", - "bh", - "bl", - "cx", - "ch", - "cl", - "dx", - "dh", - "dl", - "di", - "si", - "bp", - "sp", "ip", ), retval="eax", ) + # http://math-atlas.sourceforge.net/devel/assembly/elfspec_ppc.pdf # r0 Volatile register which may be modified during function linkage # r1 Stack frame pointer, always valid @@ -414,43 +749,43 @@ i386 = RegisterSet( # r14-r30 Registers used for local variables # r31 Used for local variables or "environment pointers" powerpc = RegisterSet( - retaddr=("lr",), + retaddr=(Reg("lr"),), flags={"msr": BitFlags(), "xer": BitFlags()}, gpr=( - "r0", - "r1", - "r2", - "r3", - "r4", - "r5", - "r6", - "r7", - "r8", - "r9", - "r10", - "r11", - "r12", - "r13", - "r14", - "r15", - "r16", - "r17", - "r18", - "r19", - "r20", - "r21", - "r22", - "r23", - "r24", - "r25", - "r26", - "r27", - "r28", - "r29", - "r30", - "r31", - "cr", - "ctr", + Reg("r0"), + Reg("r1"), + Reg("r2"), + Reg("r3"), + Reg("r4"), + Reg("r5"), + Reg("r6"), + Reg("r7"), + Reg("r8"), + Reg("r9"), + Reg("r10"), + Reg("r11"), + Reg("r12"), + Reg("r13"), + Reg("r14"), + Reg("r15"), + Reg("r16"), + Reg("r17"), + Reg("r18"), + Reg("r19"), + Reg("r20"), + Reg("r21"), + Reg("r22"), + Reg("r23"), + Reg("r24"), + Reg("r25"), + Reg("r26"), + Reg("r27"), + Reg("r28"), + Reg("r29"), + Reg("r30"), + Reg("r31"), + Reg("cr"), + Reg("ctr"), ), args=("r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"), retval="r3", @@ -483,39 +818,39 @@ powerpc = RegisterSet( # ____________________________________/ sparc = RegisterSet( - stack="sp", - frame="fp", - retaddr=("i7",), + stack=Reg("sp"), + frame=Reg("fp"), + retaddr=(Reg("i7"),), flags={"psr": BitFlags()}, gpr=( - "g1", - "g2", - "g3", - "g4", - "g5", - "g6", - "g7", - "o0", - "o1", - "o2", - "o3", - "o4", - "o5", - "o7", - "l0", - "l1", - "l2", - "l3", - "l4", - "l5", - "l6", - "l7", - "i0", - "i1", - "i2", - "i3", - "i4", - "i5", + Reg("g1"), + Reg("g2"), + Reg("g3"), + Reg("g4"), + Reg("g5"), + Reg("g6"), + Reg("g7"), + Reg("o0"), + Reg("o1"), + Reg("o2"), + Reg("o3"), + Reg("o4"), + Reg("o5"), + Reg("o7"), + Reg("l0"), + Reg("l1"), + Reg("l2"), + Reg("l3"), + Reg("l4"), + Reg("l5"), + Reg("l6"), + Reg("l7"), + Reg("i0"), + Reg("i1"), + Reg("i2"), + Reg("i3"), + Reg("i4"), + Reg("i5"), ), args=("i0", "i1", "i2", "i3", "i4", "i5"), retval="o0", @@ -535,35 +870,35 @@ sparc = RegisterSet( # r30 => frame pointer # r31 => return address mips = RegisterSet( - frame="fp", - retaddr=("ra",), + frame=Reg("fp"), + retaddr=(Reg("ra"),), gpr=( - "v0", - "v1", - "a0", - "a1", - "a2", - "a3", - "t0", - "t1", - "t2", - "t3", - "t4", - "t5", - "t6", - "t7", - "t8", - "t9", - "s0", - "s1", - "s2", - "s3", - "s4", - "s5", - "s6", - "s7", - "s8", - "gp", + Reg("v0"), + Reg("v1"), + Reg("a0"), + Reg("a1"), + Reg("a2"), + Reg("a3"), + Reg("t0"), + Reg("t1"), + Reg("t2"), + Reg("t3"), + Reg("t4"), + Reg("t5"), + Reg("t6"), + Reg("t7"), + Reg("t8"), + Reg("t9"), + Reg("s0"), + Reg("s1"), + Reg("s2"), + Reg("s3"), + Reg("s4"), + Reg("s5"), + Reg("s6"), + Reg("s7"), + Reg("s8"), + Reg("gp"), ), args=("a0", "a1", "a2", "a3"), retval="v0", @@ -592,39 +927,39 @@ mips = RegisterSet( # f18–27 => fs2–11 (FP saved registers) # f28–31 => ft8–11 (FP temporaries) riscv = RegisterSet( - pc="pc", - stack="sp", - retaddr=("ra",), + pc=Reg("pc"), + stack=Reg("sp"), + retaddr=(Reg("ra"),), gpr=( - "gp", - "tp", - "t0", - "t1", - "t2", - "s0", - "s1", - "a0", - "a1", - "a2", - "a3", - "a4", - "a5", - "a6", - "a7", - "s2", - "s3", - "s4", - "s5", - "s6", - "s7", - "s8", - "s9", - "s10", - "s11", - "t3", - "t4", - "t5", - "t6", + Reg("gp"), + Reg("tp"), + Reg("t0"), + Reg("t1"), + Reg("t2"), + Reg("s0"), + Reg("s1"), + Reg("a0"), + Reg("a1"), + Reg("a2"), + Reg("a3"), + Reg("a4"), + Reg("a5"), + Reg("a6"), + Reg("a7"), + Reg("s2"), + Reg("s3"), + Reg("s4"), + Reg("s5"), + Reg("s6"), + Reg("s7"), + Reg("s8"), + Reg("s9"), + Reg("s10"), + Reg("s11"), + Reg("t3"), + Reg("t4"), + Reg("t5"), + Reg("t6"), ), args=("a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7"), # TODO: make retval a tuple @@ -634,37 +969,37 @@ riscv = RegisterSet( # https://docs.kernel.org/arch/loongarch/introduction.html loongarch64 = RegisterSet( - pc="pc", - stack="sp", - frame="fp", - retaddr=("ra",), + pc=Reg("pc"), + stack=Reg("sp"), + frame=Reg("fp"), + retaddr=(Reg("ra"),), gpr=( - "a0", - "a1", - "a2", - "a3", - "a4", - "a5", - "a6", - "a7", - "t0", - "t1", - "t2", - "t3", - "t4", - "t5", - "t6", - "t7", - "t8", - "s0", - "s1", - "s2", - "s3", - "s4", - "s5", - "s6", - "s7", - "s8", + Reg("a0"), + Reg("a1"), + Reg("a2"), + Reg("a3"), + Reg("a4"), + Reg("a5"), + Reg("a6"), + Reg("a7"), + Reg("t0"), + Reg("t1"), + Reg("t2"), + Reg("t3"), + Reg("t4"), + Reg("t5"), + Reg("t6"), + Reg("t7"), + Reg("t8"), + Reg("s0"), + Reg("s1"), + Reg("s2"), + Reg("s3"), + Reg("s4"), + Reg("s5"), + Reg("s6"), + Reg("s7"), + Reg("s8"), ), args=( "a0", @@ -696,25 +1031,25 @@ loongarch64 = RegisterSet( # r14 | Return address | Volatile # r15 | Stack pointer | Saved s390x = RegisterSet( - pc="pc", - retaddr=("r14",), - stack="r15", + pc=Reg("pc"), + retaddr=(Reg("r14"),), + stack=Reg("r15"), flags={"pswm": BitFlags()}, gpr=( - "r0", - "r1", - "r2", - "r3", - "r4", - "r5", - "r6", - "r7", - "r8", - "r9", - "r10", - "r11", - "r12", - "r13", + Reg("r0"), + Reg("r1"), + Reg("r2"), + Reg("r3"), + Reg("r4"), + Reg("r5"), + Reg("r6"), + Reg("r7"), + Reg("r8"), + Reg("r9"), + Reg("r10"), + Reg("r11"), + Reg("r12"), + Reg("r13"), ), args=("r2", "r3", "r4", "r5", "r6"), retval="r2", diff --git a/tests/unit-tests/test_regs.py b/tests/unit-tests/test_regs.py new file mode 100644 index 000000000..a158b1dc7 --- /dev/null +++ b/tests/unit-tests/test_regs.py @@ -0,0 +1,232 @@ +from __future__ import annotations + +from pwndbg.lib.regs import PsuedoEmulatedRegisterFile +from pwndbg.lib.regs import aarch64 +from pwndbg.lib.regs import amd64 +from pwndbg.lib.regs import mips + + +def test_emulated_register_set_amd64(): + """ + These tests check that register writes and reads are implemented correctly. + + RAX = 64-bits + EAX = low 32-bits // All writes EAX will zero out the high 32-bits of RAX + AX = low 16-bits + AH = top half of AX + AL = bottom half of AX + """ + new = PsuedoEmulatedRegisterFile(amd64, 8) + + new.write_register("rax", -1) + + assert new.read_register("al") == 0xFF + assert new.read_register("ah") == 0xFF + assert new.read_register("ax") == 0xFFFF + assert new.read_register("eax") == 0xFFFF_FFFF + assert new.read_register("rax") == 0xFFFF_FFFF_FFFF_FFFF + + new.invalidate_all_registers() + + new.write_register("al", 0b1111) + + assert new.read_register("al") == 0b1111 + assert new.read_register("ah") is None + assert new.read_register("ax") is None + assert new.read_register("eax") is None + assert new.read_register("rax") is None + + new.invalidate_all_registers() + + new.write_register("eax", 0xFFFF_AABB) + + assert new.read_register("al") == 0xBB + assert new.read_register("ah") == 0xAA + assert new.read_register("ax") == 0xAABB + assert new.read_register("eax") == 0xFFFF_AABB + # The 32-bit write here zero-extends to the entire register so we can read RAX + assert new.read_register("rax") == 0x000_0000_FFFF_AABB + + new.invalidate_all_registers() + + new.write_register("ah", 0x11) + assert new.read_register("al") is None + assert new.read_register("ah") == 0x11 + assert new.read_register("ax") is None + assert new.read_register("eax") is None + assert new.read_register("rax") is None + + new.invalidate_all_registers() + + new.write_register("ax", 0x1234) + assert new.read_register("al") == 0x34 + assert new.read_register("ah") == 0x12 + assert new.read_register("ax") == 0x1234 + assert new.read_register("eax") is None + assert new.read_register("rax") is None + + +def test_emulated_register_set_amd64_more(): + new = PsuedoEmulatedRegisterFile(amd64, 8) + + # Unwritten value should return None + assert new.read_register("rbx") is None + + new.write_register("ah", 0x22) + new.write_register("al", 0x11) + + assert new.read_register("al") == 0x11 + assert new.read_register("ah") == 0x22 + assert new.read_register("ax") == 0x2211 + assert new.read_register("eax") is None + assert new.read_register("rax") is None + + new.write_register("eax", 0xFF00_0000) + + assert new.read_register("al") == 0x00 + assert new.read_register("ah") == 0x00 + assert new.read_register("ax") == 0x0000 + assert new.read_register("eax") == 0xFF00_0000 + # Writes to 32-bit registers zero-extend to the entire register + assert new.read_register("rax") == 0xFF00_0000 + + new.write_register("al", 0x01) + + assert new.read_register("al") == 0x01 + assert new.read_register("ah") == 0x00 + assert new.read_register("ax") == 0x0001 + assert new.read_register("eax") == 0xFF00_0001 + assert new.read_register("rax") == 0xFF00_0001 + + new.write_register("rax", 0x01) + + assert new.read_register("al") == 0x01 + assert new.read_register("ah") == 0x00 + assert new.read_register("ax") == 0x0001 + assert new.read_register("eax") == 0x0000_0001 + assert new.read_register("rax") == 0x0000_0001 + + new.invalidate_all_registers() + + new.write_register("rax", -1) + new.write_register("eax", -1) + + # The 32-bit write here zero-extends to the entire register. + assert new.read_register("eax") == 0xFFFF_FFFF + assert new.read_register("rax") == 0xFFFF_FFFF + + new.invalidate_all_registers() + + assert new.read_register("al") is None + assert new.read_register("ah") is None + assert new.read_register("ax") is None + assert new.read_register("eax") is None + assert new.read_register("rax") is None + + new.write_register("ah", -1) + + assert new.read_register("al") is None + assert new.read_register("ah") == 0xFF + assert new.read_register("ax") is None + assert new.read_register("eax") is None + assert new.read_register("rax") is None + + new.write_register("bh", 0xFF) + + assert new.read_register("bl") is None + assert new.read_register("bh") == 0xFF + assert new.read_register("bx") is None + assert new.read_register("ebx") is None + assert new.read_register("rbx") is None + + +def test_emulated_register_set_amd64_invalidate(): + new = PsuedoEmulatedRegisterFile(amd64, 8) + + new.write_register("rax", -1) + + new.invalidate_register("eax") + + assert new.read_register("al") is None + assert new.read_register("ah") is None + assert new.read_register("ax") is None + assert new.read_register("eax") is None + assert new.read_register("rax") is None + + new.write_register("rax", -1) + new.invalidate_register("al") + + # "ah" register doesn't overlap with "al" + assert new.read_register("al") is None + assert new.read_register("ah") == 0xFF + assert new.read_register("ax") is None + assert new.read_register("eax") is None + assert new.read_register("rax") is None + + # Write a value back to al. All the others bits are still preserved + new.write_register("al", 0xAA) + + assert new.read_register("al") == 0xAA + assert new.read_register("ah") == 0xFF + assert new.read_register("ax") == 0xFFAA + assert new.read_register("eax") == 0xFFFF_FFAA + assert new.read_register("rax") == 0xFFFF_FFFF_FFFF_FFAA + + new.invalidate_register("eax") + new.write_register("eax", -1) + + assert new.read_register("al") == 0xFF + assert new.read_register("ah") == 0xFF + assert new.read_register("ax") == 0xFFFF + assert new.read_register("eax") == 0xFFFF_FFFF + assert new.read_register("rax") == 0xFFFF_FFFF + + +def test_emulate_register_file_amd64_sign_extension(): + new = PsuedoEmulatedRegisterFile(amd64, 8) + + # This will sign extend the value to EAX, since the top bit in 0xFF is 1. + new.write_register("eax", 0xFF, source_width=1, sign_extend=True) + + assert new.read_register("al") == 0xFF + assert new.read_register("ah") == 0xFF + assert new.read_register("ax") == 0xFFFF + assert new.read_register("eax") == 0xFFFF_FFFF + assert new.read_register("rax") == 0xFFFF_FFFF + + new.write_register("eax", 0xFF, source_width=2, sign_extend=True) + + assert new.read_register("al") == 0xFF + assert new.read_register("ah") == 0x00 + assert new.read_register("ax") == 0x00FF + assert new.read_register("eax") == 0x0000_00FF + assert new.read_register("rax") == 0x0000_00FF + + +def test_emulated_register_set_aarch64(): + new = PsuedoEmulatedRegisterFile(aarch64, 8) + + new.write_register("w0", 0xFFFF_AABB) + + # The 32-bit write here zero-extends to the entire register + assert new.read_register("x0") == 0x000_0000_FFFF_AABB + assert new.read_register("w0") == 0xFFFF_AABB + + new.write_register("x0", -1) + + assert new.read_register("w0") == 0xFFFF_FFFF + assert new.read_register("x0") == 0xFFFF_FFFF_FFFF_FFFF + + +def test_emulated_register_set_mips(): + new = PsuedoEmulatedRegisterFile(mips, 4) + + new.write_register("v0", 0xFFFF_AABB) + + assert new.read_register("v0") == 0xFFFF_AABB + + new = PsuedoEmulatedRegisterFile(mips, 8) + + new.write_register("v0", 0xFF_FFFF_AABB) + + assert new.read_register("v0") == 0xFF_FFFF_AABB