From 971c93a1bf9fa9a7eea544d72b879cf487b9f197 Mon Sep 17 00:00:00 2001 From: "Matt." Date: Thu, 15 Aug 2024 13:11:39 -0300 Subject: [PATCH] Expand Debugger-agnostic API in preparation for initial command ports (#2352) * Expand Debugger-agnostic API to include vmmap, symbol and architecture functionality * Fix unit tests * Update pwndbg/dbg/lldb.py * Fix nits --------- Co-authored-by: Disconnect3d --- pwndbg/dbg/__init__.py | 76 ++++++++++++++++++++ pwndbg/dbg/gdb.py | 91 +++++++++++++++++++++--- pwndbg/dbg/lldb/__init__.py | 138 ++++++++++++++++++++++++++++++++++++ pwndbg/lib/memory.py | 7 +- 4 files changed, 299 insertions(+), 13 deletions(-) diff --git a/pwndbg/dbg/__init__.py b/pwndbg/dbg/__init__.py index 942ef6117..107df5f93 100644 --- a/pwndbg/dbg/__init__.py +++ b/pwndbg/dbg/__init__.py @@ -8,9 +8,13 @@ from enum import Enum from typing import Any from typing import Callable from typing import List +from typing import Literal +from typing import Sequence from typing import Tuple from typing import TypeVar +import pwndbg.lib.memory + dbg: Debugger = None T = TypeVar("T") @@ -20,6 +24,33 @@ class Error(Exception): pass +class Arch: + """ + The definition of an architecture. + """ + + @property + def endian(self) -> Literal["little", "big"]: + """ + Wether code in this module is little or big. + """ + raise NotImplementedError() + + @property + def name(self) -> str: + """ + Name of the architecture. + """ + raise NotImplementedError() + + @property + def ptrsize(self) -> int: + """ + Length of the pointer in this module. + """ + raise NotImplementedError() + + class Registers: """ A handle to the register values in a frame. @@ -55,6 +86,30 @@ class Thread: raise NotImplementedError() +class MemoryMap: + """ + A wrapper around a sequence of memory ranges + """ + + def is_qemu(self) -> bool: + """ + Returns whether this memory map was generated from a QEMU target. + """ + raise NotImplementedError() + + def has_reliable_perms(self) -> bool: + """ + Returns whether the permissions in this memory map are reliable. + """ + raise NotImplementedError() + + def ranges(self) -> Sequence[pwndbg.lib.memory.Page]: + """ + Returns all ranges in this memory map. + """ + raise NotImplementedError() + + class Process: def threads(self) -> List[Thread]: """ @@ -69,6 +124,27 @@ class Process: """ raise NotImplementedError() + def vmmap(self) -> MemoryMap: + """ + Returns the virtual memory map of this process. + """ + raise NotImplementedError() + + # We'll likely have to expand this into a Symbol class and change this to a + # `symbol_at_address` function later on. + def symbol_name_at_address(self, address: int) -> str | None: + """ + Returns the name of the symbol at the given address in the program, if + one exists. + """ + raise NotImplementedError() + + def arch(self) -> Arch: + """ + The default architecture of this process. + """ + raise NotImplementedError() + class TypeCode(Enum): """ diff --git a/pwndbg/dbg/gdb.py b/pwndbg/dbg/gdb.py index 0f36207a5..906e26c1e 100644 --- a/pwndbg/dbg/gdb.py +++ b/pwndbg/dbg/gdb.py @@ -4,6 +4,8 @@ import contextlib import signal from typing import Any from typing import List +from typing import Literal +from typing import Sequence from typing import Tuple from typing import TypeVar @@ -21,19 +23,27 @@ from pwndbg.gdblib import load_gdblib T = TypeVar("T") -class GDBRegisters(pwndbg.dbg_mod.Registers): - def __init__(self, frame: GDBFrame): - self.frame = frame +class GDBLibArch(pwndbg.dbg_mod.Arch): + @override + @property + def endian(self) -> Literal["little", "big"]: + import pwndbg.gdblib.arch + + return pwndbg.gdblib.arch.endian @override - def by_name(self, name: str) -> pwndbg.dbg_mod.Value | None: - try: - return GDBValue(self.frame.inner.read_register(name)) - except (gdb.error, ValueError): - # GDB throws an exception if the name is unknown, we just return - # None when that is the case. - pass - return None + @property + def name(self) -> str: + import pwndbg.gdblib.arch + + return pwndbg.gdblib.arch.name + + @override + @property + def ptrsize(self) -> int: + import pwndbg.gdblib.arch + + return pwndbg.gdblib.arch.ptrsize def parse_and_eval(expression: str, global_context: bool) -> gdb.Value: @@ -81,6 +91,21 @@ def selection(target: T, get_current: Callable[[], T], select: Callable[[T], Non select(current) +class GDBRegisters(pwndbg.dbg_mod.Registers): + def __init__(self, frame: GDBFrame): + self.frame = frame + + @override + def by_name(self, name: str) -> pwndbg.dbg_mod.Value | None: + try: + return GDBValue(self.frame.inner.read_register(name)) + except (gdb.error, ValueError): + # GDB throws an exception if the name is unknown, we just return + # None when that is the case. + pass + return None + + class GDBFrame(pwndbg.dbg_mod.Frame): def __init__(self, inner: gdb.Frame): self.inner = inner @@ -111,6 +136,25 @@ class GDBThread(pwndbg.dbg_mod.Thread): return GDBFrame(value) +class GDBMemoryMap(pwndbg.dbg_mod.MemoryMap): + def __init__(self, reliable_perms: bool, qemu: bool, pages: Sequence[pwndbg.lib.memory.Page]): + self.reliable_perms = reliable_perms + self.qemu = qemu + self.pages = pages + + @override + def is_qemu(self) -> bool: + return self.qemu + + @override + def has_reliable_perms(self) -> bool: + return self.reliable_perms + + @override + def ranges(self) -> Sequence[pwndbg.lib.memory.Page]: + return self.pages + + class GDBProcess(pwndbg.dbg_mod.Process): def __init__(self, inner: gdb.Inferior): self.inner = inner @@ -122,6 +166,31 @@ class GDBProcess(pwndbg.dbg_mod.Process): except gdb.error as e: raise pwndbg.dbg_mod.Error(e) + @override + def vmmap(self) -> pwndbg.dbg_mod.MemoryMap: + import pwndbg.gdblib.vmmap + from pwndbg.gdblib import gdb_version + + pages = pwndbg.gdblib.vmmap.get() + qemu = pwndbg.gdblib.qemu.is_qemu() and not pwndbg.gdblib.qemu.exec_file_supported() + + # Only GDB versions >=12 report permission info in info proc mappings. + # On older versions, we fallback on "rwx". + # See https://github.com/bminor/binutils-gdb/commit/29ef4c0699e1b46d41ade00ae07a54f979ea21cc + reliable_perms = not (pwndbg.gdblib.qemu.is_qemu_usermode() and gdb_version[0] < 12) + + return GDBMemoryMap(reliable_perms, qemu, pages) + + @override + def symbol_name_at_address(self, address: int) -> str | None: + import pwndbg.gdblib.symbol + + return pwndbg.gdblib.symbol.get(address) or None + + @override + def arch(self) -> pwndbg.dbg_mod.Arch: + return GDBLibArch() + class GDBCommand(gdb.Command): def __init__( diff --git a/pwndbg/dbg/lldb/__init__.py b/pwndbg/dbg/lldb/__init__.py index d17f685ac..896b974e5 100644 --- a/pwndbg/dbg/lldb/__init__.py +++ b/pwndbg/dbg/lldb/__init__.py @@ -1,9 +1,11 @@ from __future__ import annotations +import os import sys from typing import Any from typing import Callable from typing import List +from typing import Literal from typing import Tuple import lldb @@ -12,6 +14,28 @@ from typing_extensions import override import pwndbg +class LLDBArch(pwndbg.dbg_mod.Arch): + def __init__(self, name: str, ptrsize: int, endian: Literal["little", "big"]): + self._endian = endian + self._name = name + self._ptrsize = ptrsize + + @override + @property + def endian(self) -> Literal["little", "big"]: + return self._endian + + @override + @property + def name(self) -> str: + return self._name + + @override + @property + def ptrsize(self) -> int: + return self._ptrsize + + class LLDBFrame(pwndbg.dbg_mod.Frame): def __init__(self, inner: lldb.SBFrame): self.inner = inner @@ -206,6 +230,24 @@ class LLDBValue(pwndbg.dbg_mod.Value): return LLDBValue(self.inner.Cast(t.inner)) +class LLDBMemoryMap(pwndbg.dbg_mod.MemoryMap): + def __init__(self, pages: List[pwndbg.lib.memory.Page]): + self.pages = pages + + @override + def is_qemu(self) -> bool: + # TODO/FIXME: Figure a way to detect QEMU later. + return False + + @override + def has_reliable_perms(self) -> bool: + return True + + @override + def ranges(self) -> List[pwndbg.lib.memory.Page]: + return self.pages + + class LLDBProcess(pwndbg.dbg_mod.Process): def __init__(self, process: lldb.SBProcess, target: lldb.SBTarget): self.process = process @@ -221,6 +263,102 @@ class LLDBProcess(pwndbg.dbg_mod.Process): return LLDBValue(value) + @override + def vmmap(self) -> pwndbg.dbg_mod.MemoryMap: + regions = self.process.GetMemoryRegions() + + pages = [] + for i in range(regions.GetSize()): + region = lldb.SBMemoryRegionInfo() + assert regions.GetMemoryRegionAtIndex( + i, region + ), "invalid region despite being in bounds" + + objfile = region.GetName() + if objfile is None: + # LLDB will sometimes give us overlapping ranges with no name. + # For now, we ignore them, since GDB does not show them. + continue + + perms = 0 + if region.IsReadable(): + perms |= os.R_OK + if region.IsWritable(): + perms |= os.W_OK + if region.IsExecutable(): + perms |= os.X_OK + + # LLDB doesn't actually tell us the offset of the mapped file, just + # whether it is mapped or not. + offset = 0 + + pages.append( + pwndbg.lib.memory.Page( + start=region.GetRegionBase(), + size=region.GetRegionEnd() - region.GetRegionBase(), + flags=perms, + offset=offset, + objfile=objfile, + ) + ) + + return LLDBMemoryMap(pages) + + @override + def symbol_name_at_address(self, address: int) -> str | None: + addr = lldb.SBAddress(address, self.target) + ctx = self.target.ResolveSymbolContextForAddress(addr, lldb.eSymbolContextSymbol) + + if not ctx.IsValid() or not ctx.symbol.IsValid(): + return None + + return ctx.symbol.name + + @override + def arch(self) -> pwndbg.dbg_mod.Arch: + endian0 = self.process.GetByteOrder() + endian1 = self.target.GetByteOrder() + + # Sometimes - particularly when using `gdb-remote` - the process might not have had + # its architecture, and thus its byte order, properly resolved. This happens often + # around architectures like MIPS. In those cases, we might have some luck falling + # back to the architecture information in the target, that might've been manually + # set by the user, or properly detected during target creation. + if endian0 == lldb.eByteOrderInvalid: + endian0 = endian1 + + if endian0 != endian1: + raise RuntimeError( + "SBTarget::GetByteOrder() != SBProcess::GetByteOrder(). We don't know how to handle that" + ) + if endian0 != lldb.eByteOrderLittle and endian0 != lldb.eByteOrderBig: + raise RuntimeError("We only support little and big endian systems") + if endian0 == lldb.eByteOrderInvalid: + raise RuntimeError("Byte order is invalid") + + endian: Literal["little", "big"] = "little" if endian0 == lldb.eByteOrderLittle else "big" + + ptrsize0 = self.process.GetAddressByteSize() + ptrsize1 = self.target.GetAddressByteSize() + if ptrsize0 != ptrsize1: + raise RuntimeError( + "SBTarget::GetAddressByteSize() != SBProcess::GetAddressByteSize(). We don't know how to handle that" + ) + + names = self.target.GetTriple().split("-") + if len(names) == 0 or len(names[0]) == 0: + # This is a scary situation to be in. LLDB lets users attatch to + # processes even when it has no idea what the target is. In those + # cases, the target triple name will be missing, and pretty much + # every other piece of information coming from LLDB will be + # unreliable. + # + # We should have to handle ourselves gracefully here, but there's + # basically nothing we can do to help with this, so we error out. + raise pwndbg.dbg_mod.Error("Unknown target architecture") + + return LLDBArch(names[0], ptrsize0, endian) + class LLDBCommand(pwndbg.dbg_mod.CommandHandle): def __init__(self, handler_name: str, command_name: str): diff --git a/pwndbg/lib/memory.py b/pwndbg/lib/memory.py index c39fe7d3b..2079d8142 100644 --- a/pwndbg/lib/memory.py +++ b/pwndbg/lib/memory.py @@ -6,8 +6,6 @@ from __future__ import annotations import os -import pwndbg.gdblib.arch - PAGE_SIZE = 0x1000 PAGE_MASK = ~(PAGE_SIZE - 1) @@ -134,6 +132,11 @@ class Page: ) def __str__(self) -> str: + # This module requires GDB, so it causes import failures in unit tests. + # This will stop being a problem as soon as this module gets ported to + # aglib.arch, but, for now we have to add this as a quick stopgap. + import pwndbg.gdblib.arch + return f"{self.vaddr:#{2 + 2 * pwndbg.gdblib.arch.ptrsize}x} {self.vaddr + self.memsz:#{2 + 2 * pwndbg.gdblib.arch.ptrsize}x} {self.permstr} {self.memsz:8x} {self.offset:6x} {self.objfile or ''}" def __repr__(self) -> str: