From 2f03a901e7fa8376d513473bcf2a5acbc8928317 Mon Sep 17 00:00:00 2001 From: Disconnect3d Date: Wed, 3 May 2023 00:24:17 +0200 Subject: [PATCH] Optimize vis_heap_chunks command (#1678) * Optimize vis_heap_chunks command This commit optimizes the vis_heap_chunks command by: 1) precalculating bin labels instead of computing them on demand for each chunk 2) calling pwndbg.gdblib.memory.read once instead of twice in a hot loop For `vis 2000` command ran when debugging python3 shell, the first change cut down the execution time from almost 20s to 5s. Another benchmark, for both 1) and 2) changes showed 19.28s->4.14s timing. The benchmark done is included in this commit in profiling/benchmark_vis_heap_chunks/ so that it can be reproduced e.g. to optimize the function further or to reproduce my results. --- profiling/benchmark_vis_heap_chunks/README.md | 2 ++ profiling/benchmark_vis_heap_chunks/bench.sh | 2 ++ .../benchmark_vis_heap_chunks/gdbscript.py | 9 ++++++ pwndbg/commands/heap.py | 32 ++++++++++++------- 4 files changed, 34 insertions(+), 11 deletions(-) create mode 100644 profiling/benchmark_vis_heap_chunks/README.md create mode 100755 profiling/benchmark_vis_heap_chunks/bench.sh create mode 100644 profiling/benchmark_vis_heap_chunks/gdbscript.py diff --git a/profiling/benchmark_vis_heap_chunks/README.md b/profiling/benchmark_vis_heap_chunks/README.md new file mode 100644 index 000000000..f57d56b07 --- /dev/null +++ b/profiling/benchmark_vis_heap_chunks/README.md @@ -0,0 +1,2 @@ +This benchmark was used to investigate performance problems with the `vis_heap_chunks` command described in https://github.com/pwndbg/pwndbg/issues/1675 + diff --git a/profiling/benchmark_vis_heap_chunks/bench.sh b/profiling/benchmark_vis_heap_chunks/bench.sh new file mode 100755 index 000000000..dbcbd0df5 --- /dev/null +++ b/profiling/benchmark_vis_heap_chunks/bench.sh @@ -0,0 +1,2 @@ +#!/bin/sh +gdb --batch --ex 'break exit' --ex 'run' --ex 'source gdbscript.py' --args $(which python3) -c 'import sys; sys.exit(0)' diff --git a/profiling/benchmark_vis_heap_chunks/gdbscript.py b/profiling/benchmark_vis_heap_chunks/gdbscript.py new file mode 100644 index 000000000..db7cb49a8 --- /dev/null +++ b/profiling/benchmark_vis_heap_chunks/gdbscript.py @@ -0,0 +1,9 @@ +import gdb, pwndbg + +pwndbg.profiling.profiler.start() +result = gdb.execute("vis 2000", to_string=True) +pwndbg.profiling.profiler.stop('profile.prof') + +# Save result in case user wants to inspect it +with open("result", "w") as f: + f.write(result) diff --git a/pwndbg/commands/heap.py b/pwndbg/commands/heap.py index b35c27cdf..1e2c55212 100644 --- a/pwndbg/commands/heap.py +++ b/pwndbg/commands/heap.py @@ -1,5 +1,7 @@ import argparse import ctypes +from typing import Dict +from typing import List import gdb from tabulate import tabulate @@ -914,6 +916,8 @@ def vis_heap_chunks( >> 1 ) + bin_labels_map: Dict[int, List[str]] = bin_labels_mapping(bin_collections) + for c, stop in enumerate(chunk_delims): color_func = color_funcs[c % len(color_funcs)] @@ -940,17 +944,18 @@ def vis_heap_chunks( if printed % 2 == 0: out += "\n0x%x" % cursor - cell = pwndbg.gdblib.arch.unpack(pwndbg.gdblib.memory.read(cursor, ptr_size)) + data = pwndbg.gdblib.memory.read(cursor, ptr_size) + cell = pwndbg.gdblib.arch.unpack(data) cell_hex = "\t0x{:0{n}x}".format(cell, n=ptr_size * 2) out += color_func(cell_hex) printed += 1 - labels.extend(bin_labels(cursor, bin_collections)) + labels.extend(bin_labels_map.get(cursor, [])) if cursor == arena.top: labels.append("Top chunk") - asc += bin_ascii(pwndbg.gdblib.memory.read(cursor, ptr_size)) + asc += bin_ascii(data) if printed % 2 == 0: out += "\t" + color_func(asc) + ("\t <-- " + ", ".join(labels) if labels else "") asc = "" @@ -975,8 +980,14 @@ def bin_ascii(bs): return "".join(chr(c) if c in valid_chars else "." for c in bs) -def bin_labels(addr, collections): - labels = [] +def bin_labels_mapping(collections): + """ + Returns all potential bin labels for all potential addresses + We precompute all of them because doing this on demand was too slow and inefficient + See #1675 for more details + """ + labels_mapping: Dict[int, List[str]] = {} + for bins in collections: if not bins: continue @@ -989,14 +1000,13 @@ def bin_labels(addr, collections): count = "/{:d}".format(b.count) if bins_type == BinType.TCACHE else None chunks = b.fd_chain for chunk_addr in chunks: - if addr == chunk_addr: - labels.append( - "{:s}[{:s}][{:d}{}]".format( - bins_type, size, chunks.index(addr), count or "" - ) + labels_mapping.setdefault(chunk_addr, []).append( + "{:s}[{:s}][{:d}{}]".format( + bins_type, size, chunks.index(chunk_addr), count or "" ) + ) - return labels + return labels_mapping try_free_parser = argparse.ArgumentParser(