From 17979d4c1a439a1b41b4c265bd122ea3c6990b18 Mon Sep 17 00:00:00 2001 From: jxuanli <65455765+jxuanli@users.noreply.github.com> Date: Thu, 6 Nov 2025 07:16:43 -0800 Subject: [PATCH] Tracing kernel memory management (#3379) * added kmemtrace class * added ret trace handler * added lldb ret trace handler * making the output more colourful * added the actual command * storing output * temp suspend ctx output * tracing with mutex * add option to only trace relevant allocations and frees * cleaned up * renaming * docs * format * refactored + addressing comments --- docs/commands/index.md | 1 + docs/commands/kernel/kmem-trace.md | 24 +++ docs/contributing/dev-notes.md | 2 +- pwndbg/aglib/regs.py | 3 + pwndbg/arguments.py | 2 +- pwndbg/commands/__init__.py | 1 + pwndbg/commands/kmem_trace.py | 257 +++++++++++++++++++++++++++++ pwndbg/dbg/__init__.py | 20 +++ pwndbg/dbg/gdb/__init__.py | 31 ++++ pwndbg/dbg/lldb/__init__.py | 27 +++ pwndbg/dbg/lldb/hooks.py | 5 +- 11 files changed, 370 insertions(+), 3 deletions(-) create mode 100644 docs/commands/kernel/kmem-trace.md create mode 100644 pwndbg/commands/kmem_trace.py diff --git a/docs/commands/index.md b/docs/commands/index.md index e3f16e27f..02a6cbfb7 100644 --- a/docs/commands/index.md +++ b/docs/commands/index.md @@ -85,6 +85,7 @@ - [kdmesg](kernel/kdmesg.md) - Displays the kernel ring buffer (dmesg) contents. - [kfile](kernel/kfile.md) - Displays information about fds accessible by a kernel task. - [klookup](kernel/klookup.md) - Lookup kernel symbols +- [kmem-trace](kernel/kmem-trace.md) - Tracing kernel memory (SLUB and buddy) allocations and frees. - [kmod](kernel/kmod.md) - Displays the loaded Linux kernel modules. - [knft-dump](kernel/knft-dump.md) - Dump all nftables: tables, chains, rules, expressions - [knft-list-chains](kernel/knft-list-chains.md) - Dump netfilter chains form a specific table diff --git a/docs/commands/kernel/kmem-trace.md b/docs/commands/kernel/kmem-trace.md new file mode 100644 index 000000000..30834a5b7 --- /dev/null +++ b/docs/commands/kernel/kmem-trace.md @@ -0,0 +1,24 @@ + +# kmem-trace + +```text +usage: kmem-trace [-h] [-s] [-b] [-v] [-c COMMAND] [--all] + +``` + +Tracing kernel memory (SLUB and buddy) allocations and frees. + Unless --all is specified, only the allocations triggered by the until the function returns will be printed. + This option may be helpful if you also want to trace frees scheduled with rcu or if the traced command steps out of the current function. +### Optional arguments + +|Short|Long|Help| +| :--- | :--- | :--- | +|-h|--help|show this help message and exit| +|-s|--trace-slab|enable slab allocator tracing| +|-b|--trace-buddy|enable buddy allocator tracing| +|-v|--verbose|print backtraces| +|-c|--command|command to be traced (e.g. `n`, `nextret`) (default: 'n')| +||--all|display ALL memory allocations/frees regardless if they are triggered by the current function.| + + + diff --git a/docs/contributing/dev-notes.md b/docs/contributing/dev-notes.md index b59f1225f..d0da4436d 100644 --- a/docs/contributing/dev-notes.md +++ b/docs/contributing/dev-notes.md @@ -14,7 +14,7 @@ Feel free to update the list below! * We have our own `pwndbg.config.Parameter` - read about it in [Adding a Configuration Option](adding-a-parameter.md). -* The dashboard/display/context we are displaying is done by `pwndbg/commands/context.py` which is invoked through GDB's and LLDB's prompt hook, which are defined, respectively, in `pwndbg/gdblib/prompt.py` as `prompt_hook_on_stop`, and in `pwndb/dbg/lldb/hooks.py` as `prompt_hook`. +* The dashboard/display/context we are displaying is done by `pwndbg/commands/context.py` which is invoked through GDB's and LLDB's prompt hook, which are defined as `prompt_hook` in both `pwndbg/gdblib/prompt.py` and `pwndb/dbg/lldb/hooks.py` . * We change a bit GDB settings - this can be seen in `pwndbg/dbg/gdb.py` under `GDB.setup` - there are also imports for all Pwndbg submodules. diff --git a/pwndbg/aglib/regs.py b/pwndbg/aglib/regs.py index d90c2b076..5f2803f7f 100644 --- a/pwndbg/aglib/regs.py +++ b/pwndbg/aglib/regs.py @@ -105,6 +105,9 @@ class module(ModuleType): @pwndbg.lib.cache.cache_until("stop", "prompt") def read_reg(self, reg: str, frame: pwndbg.dbg_mod.Frame | None = None) -> int | None: + return self.read_reg_uncached(reg, frame) + + def read_reg_uncached(self, reg: str, frame: pwndbg.dbg_mod.Frame | None = None) -> int | None: reg = reg.lstrip("$") try: value = get_register(reg, frame) diff --git a/pwndbg/arguments.py b/pwndbg/arguments.py index 0f0eb2b90..3930a7406 100644 --- a/pwndbg/arguments.py +++ b/pwndbg/arguments.py @@ -160,7 +160,7 @@ def argument(n: int, abi: pwndbg.lib.abi.ABI | None = None) -> int: regs = abi.register_arguments if n < len(regs): - return getattr(pwndbg.aglib.regs, regs[n]) + return pwndbg.aglib.regs.read_reg_uncached(regs[n]) n -= len(regs) diff --git a/pwndbg/commands/__init__.py b/pwndbg/commands/__init__.py index aa78fcca9..a0f7d0234 100644 --- a/pwndbg/commands/__init__.py +++ b/pwndbg/commands/__init__.py @@ -942,6 +942,7 @@ def load_commands() -> None: import pwndbg.commands.kdmabuf import pwndbg.commands.kdmesg import pwndbg.commands.klookup + import pwndbg.commands.kmem_trace import pwndbg.commands.kmod import pwndbg.commands.knft import pwndbg.commands.ksyscalls diff --git a/pwndbg/commands/kmem_trace.py b/pwndbg/commands/kmem_trace.py new file mode 100644 index 000000000..d6cefaa9c --- /dev/null +++ b/pwndbg/commands/kmem_trace.py @@ -0,0 +1,257 @@ +from __future__ import annotations + +import argparse +import threading + +import pwndbg.aglib.regs +import pwndbg.aglib.symbol +import pwndbg.arguments +import pwndbg.color as C +import pwndbg.color.message as M +import pwndbg.commands +from pwndbg.dbg import BreakpointLocation + +parser = argparse.ArgumentParser( + description=""" + Tracing kernel memory (SLUB and buddy) allocations and frees. + Unless --all is specified, only the allocations triggered by the until the function returns will be printed. + This option may be helpful if you also want to trace frees scheduled with rcu or if the traced command steps out of the current function. + """ +) +parser.add_argument("-s", "--trace-slab", action="store_true", help="enable slab allocator tracing") +parser.add_argument( + "-b", "--trace-buddy", action="store_true", help="enable buddy allocator tracing" +) +parser.add_argument("-v", "--verbose", action="store_true", help="print backtraces") +parser.add_argument( + "-c", "--command", type=str, default="n", help="command to be traced (e.g. `n`, `nextret`)" +) +parser.add_argument( + "--all", + action="store_true", + help="display ALL memory allocations/frees regardless if they are triggered by the current function.", +) + + +class KmemTracepointsData: + def __init__(self, verbose, trace_all): + self.results = [] + self.order = None + self.mutex = threading.RLock() + self.verbose = verbose + self.curr = None # None means tracing all + if not trace_all: + # current frame only accounting for jumps + pc = pwndbg.dbg.selected_frame().parent().pc() + self.curr = pwndbg.aglib.symbol.resolve_addr(pc).split("+")[0] + + def add_result(self, result: str): + if not result: + return + with self.mutex: + bt = pwndbg.commands.context.context_backtrace(False) + if not self.curr or any(self.curr in line for line in bt): + self.results.append(result) + if self.verbose: + self.results += bt + + def _format_kmem_tracepoint_output(self, prefix, name, type, addr): + prefix = prefix.ljust(12, " ") + if "FREE" in prefix: + prefix = C.red(prefix) + else: + prefix = C.green(prefix) + name = C.blue(name.ljust(16, " ")) + type = type.ljust(4, " ") + return f"{prefix} {name} {type} @ {C.blue(hex(addr))}" + + def format_slab_kmem_tracepoint_output(self, is_free: bool, objaddr: int): + if objaddr == 0: + return + if is_free: + prefix = "[SLAB FREE]" + else: + prefix = "[SLAB ALLOC]" + try: + cache = pwndbg.aglib.kernel.slab.find_containing_slab_cache(objaddr) + name = cache.name + except Exception: + self.results.append(M.warn(f"{prefix} invalid SLUB object @ {objaddr}")) + return + result = self._format_kmem_tracepoint_output(prefix, name, "obj", objaddr) + self.add_result(result) + + def format_page_kmem_tracepoint_output(self, is_free: bool, page: int, order: int): + if is_free: + prefix = "[PAGE FREE]" + else: + prefix = "[PAGE ALLOC]" + name = f"order-{order}" + physmap = pwndbg.aglib.kernel.page_to_virt(page) + result = self._format_kmem_tracepoint_output(prefix, name, "page", page) + result += f" (physmap: {C.red(hex(physmap))})" + self.add_result(result) + + +class KmemTracepoints: + def __init__(self): + # try to capture the lowest possible level of exported functions in the (de)alloc chain + # for example __alloc_pages_bulk calls __alloc_pages and only __alloc_pages is included + # lists might not be complete + # try to resolve all names, if does not exist, means it is not exported for that version + kmalloc_names = ( # (trys to) include all slab alloc functions for all v5.x and v6.x + "__kmalloc", + "__kmalloc_node", + "__kmalloc_node_track_caller", + "__kmalloc_track_caller", + "__krealloc", + "kmalloc_order", + "kmalloc_order_trace", + "kmem_cache_alloc", + "kmem_cache_alloc_node", + "kmem_cache_alloc_node_trace", + "kmem_cache_alloc_trace", + "kmem_cache_alloc_lru", + "krealloc", + "kmalloc_node_trace", + "kmalloc_trace", + "__kmalloc_node_noprof", + "__kmalloc_noprof", + "kmalloc_node_trace_noprof", + "kmalloc_node_track_caller_noprof", + "kmalloc_trace_noprof", + "kmem_cache_alloc_lru_noprof", + "kmem_cache_alloc_node_noprof", + "kmem_cache_alloc_noprof", + "krealloc_noprof", + "__kmalloc_node_track_caller_noprof", + "__kmalloc_cache_node_noprof", + "__kmalloc_cache_noprof", + ) + self.kallocs = self.resolve_names(kmalloc_names) + kfree_names = ("kfree",) + self.kfrees = self.resolve_names(kfree_names) + palloc_names = ( # all of those functions have the 2nd arg == order + "__alloc_frozen_pages_noprof", + "__alloc_pages", + "__alloc_pages_nodemask", + "alloc_pages_noprof", + ) + self.pallocs = self.resolve_names(palloc_names) + pfree_names = ( # page *, order + "__free_pages", + ) + self.pfrees = self.resolve_names(pfree_names) + self.sps = [] + self.data = None + self.slab_tracepoints_enabled = True + self.buddy_tracepoints_enabled = True + + def resolve_names(self, names): + result = [] + for name in names: + addr = pwndbg.aglib.symbol.lookup_symbol_addr(name) + if addr is None: + continue + result.append(addr) + return result + + @staticmethod + def _kalloc_handler() -> bool: + self = get_kmem_tracepoints() + objaddr = pwndbg.aglib.regs.read_reg_uncached(pwndbg.aglib.regs.retval) + self.data.format_slab_kmem_tracepoint_output(False, objaddr) + return False + + @staticmethod + def kalloc_handler(sp: pwndbg.dbg_mod.StopPoint) -> bool: + pwndbg.dbg.selected_inferior().trace_ret(KmemTracepoints._kalloc_handler, True) + return False + + @staticmethod + def kfree_handler(sp: pwndbg.dbg_mod.StopPoint) -> bool: + self = get_kmem_tracepoints() + objaddr = pwndbg.arguments.argument(0) + self.data.format_slab_kmem_tracepoint_output(True, objaddr) + return False + + @staticmethod + def _palloc_handler() -> bool: + self = get_kmem_tracepoints() + page = pwndbg.aglib.regs.read_reg_uncached(pwndbg.aglib.regs.retval) + order = self.data.order + self.data.format_page_kmem_tracepoint_output(False, page, order) + return False + + @staticmethod + def palloc_handler(sp: pwndbg.dbg_mod.StopPoint) -> bool: + self = get_kmem_tracepoints() + order = pwndbg.arguments.argument(1) + pwndbg.dbg.selected_inferior().trace_ret(KmemTracepoints._palloc_handler, True) + self.data.order = order + return False + + @staticmethod + def pfree_handler(sp: pwndbg.dbg_mod.StopPoint) -> bool: + self = get_kmem_tracepoints() + page = pwndbg.arguments.argument(0) + order = pwndbg.arguments.argument(1) + self.data.format_page_kmem_tracepoint_output(self.results, True, page, order) + return False + + def register_breakpoints(self, verbose, trace_all): + self.results = [] + inf = pwndbg.dbg.selected_inferior() + self.data = KmemTracepointsData(verbose, trace_all) + if self.slab_tracepoints_enabled: + for kalloc in self.kallocs: + bp = BreakpointLocation(kalloc) + sp = inf.break_at(bp, KmemTracepoints.kalloc_handler, internal=True) + self.sps.append(sp) + for kfree in self.kfrees: + bp = BreakpointLocation(kfree) + sp = inf.break_at(bp, KmemTracepoints.kfree_handler, internal=True) + self.sps.append(sp) + if self.buddy_tracepoints_enabled: + for palloc in self.pallocs: + bp = BreakpointLocation(palloc) + sp = inf.break_at(bp, KmemTracepoints.palloc_handler, internal=True) + self.sps.append(sp) + for pfree in self.pfrees: + bp = BreakpointLocation(pfree) + sp = inf.break_at(bp, KmemTracepoints.pfree_handler, internal=True) + self.sps.append(sp) + + def remove_breakpoints(self): + for sp in self.sps: + sp.remove() + self.sps = [] + self.slab_tracepoints_enabled = True + self.buddy_tracepoints_enabled = True + + +@pwndbg.lib.cache.cache_until("objfile") +def get_kmem_tracepoints(): + return KmemTracepoints() + + +@pwndbg.commands.Command(parser, category=pwndbg.commands.CommandCategory.KERNEL) +@pwndbg.commands.OnlyWhenQemuKernel +@pwndbg.commands.OnlyWithKernelDebugSymbols +@pwndbg.commands.OnlyWhenPagingEnabled +def kmem_trace(trace_slab: bool, trace_buddy: bool, verbose: bool, command: str, all: bool): + tps = get_kmem_tracepoints() + if not trace_slab and not trace_buddy: + trace_slab = trace_buddy = True + tps.slab_tracepoints_enabled = trace_slab + tps.buddy_tracepoints_enabled = trace_buddy + tps.register_breakpoints(verbose, all) + print(M.success("Finished registering tracepoints.")) + old_val = pwndbg.config.context_backtrace_lines.value + pwndbg.config.context_backtrace_lines.value = 1000 # enable full backtrace + pwndbg.dbg.selected_inferior().runcmd(command) + pwndbg.config.context_backtrace_lines.value = old_val # restore + pwndbg.commands.context.context() + tps.remove_breakpoints() + print("\n".join(tps.data.results)) + pwndbg.dbg.ctx_suspend_once() diff --git a/pwndbg/dbg/__init__.py b/pwndbg/dbg/__init__.py index 32df33c40..6f725f64e 100644 --- a/pwndbg/dbg/__init__.py +++ b/pwndbg/dbg/__init__.py @@ -562,6 +562,13 @@ class Process: """ raise NotImplementedError() + def trace_ret(self, stop_handler: Callable[[], bool] | None = None, internal: bool = False): + """ + Traces/break_at the current frame's return address. + `stop_handler` and `internal` have the same semantic meaning as they are in `break_at` + """ + raise NotImplementedError() + # This is a fairly lazy solution. We would ideally support a more robust way # to query for ABIs, but Pwndbg currely only uses `show osabi` in GDB to # check for whether the target is running under Linux, so we only implement @@ -638,6 +645,12 @@ class Process: """ raise NotImplementedError() + def runcmd(self, cmd): + """ + Runs a debugger command + """ + raise NotImplementedError() + class TypeCode(Enum): """ @@ -1162,6 +1175,13 @@ class Debugger: finally: self.resume_events(ty) + @contextlib.contextmanager + def ctx_suspend_once(self): + """ + Avoid printing ctx once + """ + raise NotImplementedError() + def suspend_events(self, ty: EventType) -> None: """ Suspend delivery of all events of the given type until it is resumed diff --git a/pwndbg/dbg/gdb/__init__.py b/pwndbg/dbg/gdb/__init__.py index df3460901..e67036976 100644 --- a/pwndbg/dbg/gdb/__init__.py +++ b/pwndbg/dbg/gdb/__init__.py @@ -287,6 +287,19 @@ class BreakpointAdapter(gdb.Breakpoint): return self.stop_handler() +class FinishpointAdapter(gdb.FinishBreakpoint): + stop_handler: Callable[[], bool] + + def __init__(self, stop_handler, internal): + super().__init__(gdb.newest_frame(), internal) + self.stop_handler = stop_handler + + @override + def stop(self) -> bool: + result = self.stop_handler() + return result + + class GDBStopPoint(pwndbg.dbg_mod.StopPoint): inner: gdb.Breakpoint proc: GDBProcess @@ -834,6 +847,15 @@ class GDBProcess(pwndbg.dbg_mod.Process): return sp + @override + def trace_ret(self, stop_handler: Callable[[], bool] | None = None, internal: bool = False): + if stop_handler is None: + + def stop_handler(): + return True + + FinishpointAdapter(stop_handler, internal) + @override def is_linux(self) -> bool: # Detect current ABI of client side by 'show osabi' @@ -983,6 +1005,10 @@ class GDBProcess(pwndbg.dbg_mod.Process): return gdb.execute(f"add-symbol-file {path} {base}") + @override + def runcmd(self, cmd) -> str: + return gdb.execute(cmd, to_string=True) + class GDBExecutionController(pwndbg.dbg_mod.ExecutionController): @override @@ -1641,6 +1667,11 @@ class GDB(pwndbg.dbg_mod.Debugger): elif ty == pwndbg.dbg_mod.EventType.SUSPEND_ALL: raise RuntimeError("invalid usage, this event is not supported") + @override + @contextmanager + def ctx_suspend_once(self): + pwndbg.gdblib.prompt.context_shown = True + @override def suspend_events(self, ty: pwndbg.dbg_mod.EventType) -> None: pwndbg.gdblib.events.pause(_gdb_event_class_from_event_type(ty)) diff --git a/pwndbg/dbg/lldb/__init__.py b/pwndbg/dbg/lldb/__init__.py index eb94960eb..5ca3b26d2 100644 --- a/pwndbg/dbg/lldb/__init__.py +++ b/pwndbg/dbg/lldb/__init__.py @@ -1710,6 +1710,20 @@ class LLDBProcess(pwndbg.dbg_mod.Process): return sp + @override + def trace_ret(self, stop_handler: Callable[[], bool] | None = None, internal: bool = False): + if stop_handler is None: + + def stop_handler(): + return True + + def new_stop_handler(sp: pwndbg.dbg_mod.StopPoint) -> bool: + return stop_handler() + + retaddr = pwndbg.dbg.selected_frame().parent().pc() + bp = pwndbg.dbg_mod.BreakpointLocation(retaddr) + self.break_at(bp, new_stop_handler, internal) + @override def disasm(self, address: int) -> pwndbg.dbg_mod.DisassembledInstruction | None: instructions = self.target.ReadInstructions(lldb.SBAddress(address, self.target), 1) @@ -1816,6 +1830,10 @@ class LLDBProcess(pwndbg.dbg_mod.Process): # Queue the coroutine up for execution by the Pwndbg CLI. self.dbg.controllers.append((self, procedure(EXECUTION_CONTROLLER))) + @override + def runcmd(self, cmd) -> str: + return self.dbg._execute_lldb_command(cmd) + class LLDBCommand(pwndbg.dbg_mod.CommandHandle): def __init__(self, handler_name: str, command_name: str): @@ -1848,6 +1866,9 @@ class LLDB(pwndbg.dbg_mod.Debugger): # Relay used for exceptions originating from commands called through LLDB. _exception_relay: BaseException | None + # temporarily suspend context output + should_suspend_ctx: bool + @override def setup(self, *args, **kwargs): import pwnlib.update @@ -1859,6 +1880,7 @@ class LLDB(pwndbg.dbg_mod.Debugger): self.controllers = [] self._current_process_is_gdb_remote = False self._exception_relay = None + self.should_suspend_ctx = False import pwndbg @@ -2101,6 +2123,11 @@ class LLDB(pwndbg.dbg_mod.Debugger): return decorator + @override + @contextmanager + def ctx_suspend_once(self): + self.should_suspend_ctx = True + @override def suspend_events(self, ty: pwndbg.dbg_mod.EventType) -> None: self.suspended_events[ty] = True diff --git a/pwndbg/dbg/lldb/hooks.py b/pwndbg/dbg/lldb/hooks.py index 40e9b3aa9..a7ad61cbd 100644 --- a/pwndbg/dbg/lldb/hooks.py +++ b/pwndbg/dbg/lldb/hooks.py @@ -85,10 +85,13 @@ def prompt_hook(): # Clear the prompt cache manually. pwndbg.lib.cache.clear_cache("prompt") + dbg: LLDB = pwndbg.dbg + ctx_suspend_once = dbg.should_suspend_ctx global should_show_context - if should_show_context: + if should_show_context and not ctx_suspend_once: pwndbg.commands.context.context() should_show_context = False + dbg.should_suspend_ctx = False # Install the prompt hook.