From feeb7133965bf39a9c3f009c8caabe3a84a24688 Mon Sep 17 00:00:00 2001 From: patryk4815 Date: Sun, 10 Nov 2024 23:32:23 +0100 Subject: [PATCH] Fix jemalloc #2502 (#2512) * add nix fmt * fix capstone=5.0.3 on darwin * nix fmt * nix fmt * port jemalloc to aglib * fix aglib gdb value by index * fix jemalloc typing * fix jemalloc typing * jemalloc improve times --- flake.nix | 1 + nix/bundle/pkg.nix | 3 +- nix/devshell.nix | 45 +++++++----- nix/pwndbg.nix | 132 ++++++++++++++++++++-------------- nix/pyenv.nix | 34 ++++++--- pwndbg/aglib/heap/jemalloc.py | 95 ++++++++++++------------ pwndbg/aglib/memory.py | 10 +++ pwndbg/dbg/gdb.py | 2 +- 8 files changed, 191 insertions(+), 131 deletions(-) diff --git a/flake.nix b/flake.nix index b00197ae9..c9315c96b 100644 --- a/flake.nix +++ b/flake.nix @@ -93,5 +93,6 @@ isLLDB = true; } ); + formatter = forAllSystems (system: pkgsBySystem.${system}.nixfmt-rfc-style); }; } diff --git a/nix/bundle/pkg.nix b/nix/bundle/pkg.nix index ae5530b55..26783794d 100644 --- a/nix/bundle/pkg.nix +++ b/nix/bundle/pkg.nix @@ -16,7 +16,8 @@ let { drv ? null, config ? "nfpm.yaml", - packager ? null, # apk|deb|rpm|archlinux + packager ? null, + # apk|deb|rpm|archlinux preremove ? null, ... }@attrs: diff --git a/nix/devshell.nix b/nix/devshell.nix index dd3b555e2..793a92399 100644 --- a/nix/devshell.nix +++ b/nix/devshell.nix @@ -1,7 +1,6 @@ # This should be kept in sync with setup-dev.sh and lint.sh requirements { - pkgs ? - # If pkgs is not defined, instantiate nixpkgs from locked commit + pkgs ? # If pkgs is not defined, instantiate nixpkgs from locked commit let lock = (builtins.fromJSON (builtins.readFile ./flake.lock)).nodes.nixpkgs.locked; nixpkgs = fetchTarball { @@ -17,7 +16,12 @@ }: let pyEnv = import ./pyenv.nix { - inherit pkgs python3 inputs isLLDB; + inherit + pkgs + python3 + inputs + isLLDB + ; lib = pkgs.lib; isDev = true; }; @@ -26,22 +30,27 @@ in default = pkgs.mkShell { NIX_CONFIG = "extra-experimental-features = nix-command flakes repl-flake"; # Anything not handled by the poetry env - nativeBuildInputs = (with pkgs; [ - # from setup-dev.sh - nasm - gcc - curl - gdb - parallel - qemu - netcat-openbsd - zig_0_10 # matches setup-dev.sh - go + nativeBuildInputs = + (with pkgs; [ + # from setup-dev.sh + nasm + gcc + curl + gdb + parallel + qemu + netcat-openbsd + zig_0_10 # matches setup-dev.sh + go - pyEnv - ]) ++ pkgs.lib.optionals isLLDB (with pkgs; [ - lldb_19 - ]); + pyEnv + ]) + ++ pkgs.lib.optionals isLLDB ( + with pkgs; + [ + lldb_19 + ] + ); shellHook = '' export PWNDBG_VENV_PATH="PWNDBG_PLEASE_SKIP_VENV" export ZIGPATH="${pkgs.lib.getBin pkgs.zig_0_10}/bin/" diff --git a/nix/pwndbg.nix b/nix/pwndbg.nix index 73cc26192..2fe0f1e16 100644 --- a/nix/pwndbg.nix +++ b/nix/pwndbg.nix @@ -27,7 +27,8 @@ let python3 inputs isDev - isLLDB; + isLLDB + ; lib = pkgs.lib; }; @@ -40,66 +41,93 @@ let '' ); - pwndbg = let - pwndbgName = if isLLDB then "pwndbg-lldb" else "pwndbg"; - in pkgs.stdenv.mkDerivation { - name = pwndbgName; - version = pwndbgVersion; + pwndbg = + let + pwndbgName = if isLLDB then "pwndbg-lldb" else "pwndbg"; + in + pkgs.stdenv.mkDerivation { + name = pwndbgName; + version = pwndbgVersion; - src = pkgs.lib.sourceByRegex inputs.pwndbg ([ - "pwndbg" - "pwndbg/.*" - ] ++ (if isLLDB then [ - "lldbinit.py" - "pwndbg-lldb.py" - ] else [ - "gdbinit.py" - ])); + src = pkgs.lib.sourceByRegex inputs.pwndbg ( + [ + "pwndbg" + "pwndbg/.*" + ] + ++ ( + if isLLDB then + [ + "lldbinit.py" + "pwndbg-lldb.py" + ] + else + [ + "gdbinit.py" + ] + ) + ); - nativeBuildInputs = [ pkgs.makeWrapper ]; - buildInputs = [ pyEnv ]; + nativeBuildInputs = [ pkgs.makeWrapper ]; + buildInputs = [ pyEnv ]; - installPhase = let - fix_init_script = { target, line }: '' - # Build self-contained init script for lazy loading from vanilla gdb - # I purposely use insert() so I can re-import during development without having to restart gdb - sed "${line} i import sys, os\n\ - sys.path.insert(0, '${pyEnv}/${pyEnv.sitePackages}')\n\ - sys.path.insert(0, '$out/share/pwndbg/')\n\ - os.environ['PATH'] += ':${binPath}'\n" -i ${target} - ''; - in (if isLLDB then '' - mkdir -p $out/share/pwndbg - mkdir -p $out/bin + installPhase = + let + fix_init_script = + { target, line }: + '' + # Build self-contained init script for lazy loading from vanilla gdb + # I purposely use insert() so I can re-import during development without having to restart gdb + sed "${line} i import sys, os\n\ + sys.path.insert(0, '${pyEnv}/${pyEnv.sitePackages}')\n\ + sys.path.insert(0, '$out/share/pwndbg/')\n\ + os.environ['PATH'] += ':${binPath}'\n" -i ${target} + ''; + in + ( + if isLLDB then + '' + mkdir -p $out/share/pwndbg + mkdir -p $out/bin - cp -r lldbinit.py pwndbg $out/share/pwndbg - cp pwndbg-lldb.py $out/bin/${pwndbgName} + cp -r lldbinit.py pwndbg $out/share/pwndbg + cp pwndbg-lldb.py $out/bin/${pwndbgName} - ${fix_init_script { target = "$out/bin/${pwndbgName}"; line = "4"; } } + ${fix_init_script { + target = "$out/bin/${pwndbgName}"; + line = "4"; + }} - touch $out/share/pwndbg/.skip-venv - wrapProgram $out/bin/${pwndbgName} \ - --prefix PATH : ${ pkgs.lib.makeBinPath [ lldb ] } \ - '' + (pkgs.lib.optionalString (!pkgs.stdenv.isDarwin) '' - --set LLDB_DEBUGSERVER_PATH ${ pkgs.lib.makeBinPath [ lldb ] }/lldb-server \ - '') + '' - --set PWNDBG_LLDBINIT_DIR $out/share/pwndbg - '' else '' - mkdir -p $out/share/pwndbg + touch $out/share/pwndbg/.skip-venv + wrapProgram $out/bin/${pwndbgName} \ + --prefix PATH : ${pkgs.lib.makeBinPath [ lldb ]} \ + '' + + (pkgs.lib.optionalString (!pkgs.stdenv.isDarwin) '' + --set LLDB_DEBUGSERVER_PATH ${pkgs.lib.makeBinPath [ lldb ]}/lldb-server \ + '') + + '' + --set PWNDBG_LLDBINIT_DIR $out/share/pwndbg + '' + else + '' + mkdir -p $out/share/pwndbg - cp -r gdbinit.py pwndbg $out/share/pwndbg - ${fix_init_script { target = "$out/share/pwndbg/gdbinit.py"; line = "2"; } } + cp -r gdbinit.py pwndbg $out/share/pwndbg + ${fix_init_script { + target = "$out/share/pwndbg/gdbinit.py"; + line = "2"; + }} - touch $out/share/pwndbg/.skip-venv - makeWrapper ${gdb}/bin/gdb $out/bin/${pwndbgName} \ - --add-flags "--quiet --early-init-eval-command=\"set auto-load safe-path /\" --command=$out/share/pwndbg/gdbinit.py" - ''); + touch $out/share/pwndbg/.skip-venv + makeWrapper ${gdb}/bin/gdb $out/bin/${pwndbgName} \ + --add-flags "--quiet --early-init-eval-command=\"set auto-load safe-path /\" --command=$out/share/pwndbg/gdbinit.py" + '' + ); - meta = { - pwndbgVenv = pyEnv; - python3 = python3; - gdb = gdb; + meta = { + pwndbgVenv = pyEnv; + python3 = python3; + gdb = gdb; + }; }; - }; in pwndbg diff --git a/nix/pyenv.nix b/nix/pyenv.nix index 6855e8936..fc21cf569 100644 --- a/nix/pyenv.nix +++ b/nix/pyenv.nix @@ -27,16 +27,30 @@ pkgs.poetry2nix.mkPoetryEnv { pt = super.pt.overridePythonAttrs (old: { buildInputs = (old.buildInputs or [ ]) ++ [ super.poetry-core ]; }); - capstone = super.capstone.overridePythonAttrs (old: { - # fix darwin - preBuild = pkgs.lib.optionalString pkgs.stdenv.isDarwin '' - sed -i 's/^IS_APPLE := .*$/IS_APPLE := 1/' ./src/Makefile - ''; - # fix build for aarch64: https://github.com/capstone-engine/capstone/issues/2102 - postPatch = pkgs.lib.optionalString pkgs.stdenv.isLinux '' - substituteInPlace setup.py --replace manylinux1 manylinux2014 - ''; - }); + + capstone = + # capstone=5.0.3 build is broken only in darwin :(, soo we use wheel + if pkgs.stdenv.isDarwin then + super.capstone.override { preferWheel = true; } + else + super.capstone.overridePythonAttrs (old: { + # fix darwin + preBuild = pkgs.lib.optionalString pkgs.stdenv.isDarwin '' + sed -i 's/^IS_APPLE := .*$/IS_APPLE := 1/' ./src/Makefile + ''; + # fix darwin + nativeBuildInputs = + (old.nativeBuildInputs or [ ]) + ++ pkgs.lib.optionals pkgs.stdenv.isDarwin [ + pkgs.cmake + pkgs.fixDarwinDylibNames + ]; + # fix build for aarch64: https://github.com/capstone-engine/capstone/issues/2102 + postPatch = pkgs.lib.optionalString pkgs.stdenv.isLinux '' + substituteInPlace setup.py --replace manylinux1 manylinux2014 + ''; + }); + sortedcontainers-stubs = super.sortedcontainers-stubs.overridePythonAttrs (old: { buildInputs = (old.buildInputs or [ ]) ++ [ super.poetry-core ]; }); diff --git a/pwndbg/aglib/heap/jemalloc.py b/pwndbg/aglib/heap/jemalloc.py index ff8675e7e..327df8bd5 100644 --- a/pwndbg/aglib/heap/jemalloc.py +++ b/pwndbg/aglib/heap/jemalloc.py @@ -1,10 +1,9 @@ from __future__ import annotations -import gdb +from typing import Dict -import pwndbg.gdblib.info -import pwndbg.gdblib.memory -import pwndbg.gdblib.typeinfo +import pwndbg.aglib.memory +import pwndbg.aglib.typeinfo # adapted from jemalloc source 5.3.0 LG_VADDR = 48 @@ -193,24 +192,23 @@ class RTree: def __init__(self, addr: int) -> None: self._addr = addr - rtree_s = pwndbg.gdblib.typeinfo.load("struct rtree_s") - # self._Value = pwndbg.gdblib.memory.poi(emap_s, self._addr) + # self._Value = pwndbg.aglib.memory.poi(emap_s, self._addr) - # self._Value = pwndbg.gdblib.memory.fetch_struct_as_dictionary( + # self._Value = pwndbg.aglib.memory.fetch_struct_as_dictionary( # "rtree_s", self._addr, include_only_fields={"root"} # ) - self._Value = gdb.Value(self._addr).cast(rtree_s.pointer()).dereference() + # pwndbg.aglib.memory + self._Value = pwndbg.aglib.memory.get_typed_pointer_value("struct rtree_s", self._addr) self._extents = None @staticmethod - def get_rtree() -> RTree: + def get_rtree() -> RTree | None: try: - addr = pwndbg.gdblib.info.address("je_arena_emap_global") + addr = pwndbg.dbg.selected_inferior().symbol_address_from_name("je_arena_emap_global") if addr is None: return None - - except gdb.MemoryError: + except pwndbg.dbg_mod.Error: return None return RTree(addr) @@ -232,12 +230,12 @@ class RTree: return ptrbits - cumbits # Can be used to lookup key quickly in cache - def __rtree_leafkey(self, key, level): + def __rtree_leafkey(self, key: int, level: int) -> int: mask = ~((1 << self.__rtree_leaf_maskbits(level)) - 1) # print("mask: ", mask, bin(mask)) return key & mask - def __subkey(self, key, level): + def __subkey(self, key: int, level: int) -> int: """ Return a portion of the key that is used to find the node/leaf in the rtree at a specific level. Source: https://github.com/jemalloc/jemalloc/blob/5b72ac098abce464add567869d082f2097bd59a2/include/jemalloc/internal/rtree.h#L161 @@ -255,15 +253,15 @@ class RTree: def __alignment_addr2base(addr, alignment=64): return addr - (addr - (addr & (~(alignment - 1)))) - def lookup_hard(self, key): + def lookup_hard(self, key: int): """ Lookup the key in the rtree and return the value. How it works: - Jemalloc stores the extent address in the rtree as a node and to find a specific node we need a address key. """ - rtree_node_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_node_elm_s") - rtree_leaf_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_leaf_elm_s") + rtree_node_elm_s = pwndbg.aglib.typeinfo.load("struct rtree_node_elm_s") + rtree_leaf_elm_s = pwndbg.aglib.typeinfo.load("struct rtree_leaf_elm_s") # Credits: 盏一's jegdb @@ -271,9 +269,10 @@ class RTree: subkey = self.__subkey(key, 1) addr = int(self.root.address) + subkey * rtree_node_elm_s.sizeof - node = pwndbg.gdblib.memory.fetch_struct_as_dictionary("rtree_node_elm_s", addr) - - child_repr: int = node["child"]["repr"] # type: ignore[index] + fetched_struct = pwndbg.aglib.memory.get_typed_pointer_value( + "struct rtree_node_elm_s", addr + ) + child_repr = int(fetched_struct["child"]["repr"]) # on node element, child contains the bits with which we can find another node or leaf element if child_repr == 0: @@ -282,10 +281,12 @@ class RTree: # For subkey 1 subkey = self.__subkey(key, 2) addr = child_repr + subkey * rtree_leaf_elm_s.sizeof - leaf = pwndbg.gdblib.memory.fetch_struct_as_dictionary("rtree_leaf_elm_s", addr) + fetched_struct = pwndbg.aglib.memory.get_typed_pointer_value( + "struct rtree_leaf_elm_s", addr + ) # On leaf element, le_bits contains the virtual memory address bits so we can use it to find the extent address - val: int = leaf["le_bits"]["repr"] # type: ignore[index] + val = int(fetched_struct["le_bits"]["repr"]) if val == 0: return None @@ -325,21 +326,19 @@ class RTree: last_addr = None extent_addresses = [] - rtree_node_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_node_elm_s") - rtree_leaf_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_leaf_elm_s") + rtree_node_elm_s = pwndbg.aglib.typeinfo.load("struct rtree_node_elm_s") + rtree_leaf_elm_s = pwndbg.aglib.typeinfo.load("struct rtree_leaf_elm_s") max_subkeys = 1 << rtree_levels[RTREE_HEIGHT - 1][0]["bits"] # print("max_subkeys: ", max_subkeys) for i in range(max_subkeys): node_address = int(root.address) + i * rtree_node_elm_s.sizeof - # node = pwndbg.gdblib.memory.poi(rtree_node_elm_s, node) - fetched_struct = pwndbg.gdblib.memory.get_typed_pointer_value( + # node = pwndbg.aglib.memory.poi(rtree_node_elm_s, node) + fetched_struct = pwndbg.aglib.memory.get_typed_pointer_value( rtree_node_elm_s, node_address ) - node = pwndbg.gdblib.memory.pack_struct_into_dictionary(fetched_struct) - - leaf0: int = node["child"]["repr"] # type: ignore[index] + leaf0 = int(fetched_struct["child"]["repr"]) if leaf0 == 0: continue @@ -349,13 +348,12 @@ class RTree: # level 1 for j in range(max_subkeys): leaf_address = leaf0 + j * rtree_leaf_elm_s.sizeof - # leaf = pwndbg.gdblib.memory.poi(rtree_leaf_elm_s, leaf) - fetched_struct = pwndbg.gdblib.memory.get_typed_pointer_value( + # leaf = pwndbg.aglib.memory.poi(rtree_leaf_elm_s, leaf) + fetched_struct = pwndbg.aglib.memory.get_typed_pointer_value( rtree_leaf_elm_s, leaf_address ) - leaf = pwndbg.gdblib.memory.pack_struct_into_dictionary(fetched_struct) - - if (val := int(leaf["le_bits"]["repr"])) == 0: # type: ignore[index, arg-type] + val = int(fetched_struct["le_bits"]["repr"]) + if val == 0: continue # print("leaf: ", hex(leaf_address)) @@ -389,7 +387,7 @@ class RTree: self._extents.append(extent_tmp) - except gdb.MemoryError: + except pwndbg.dbg_mod.Error: pass return self._extents @@ -409,8 +407,7 @@ class Extent: self._addr = addr # fetch_struct_as_dictionary does not support union currently - edata_s = pwndbg.gdblib.typeinfo.load("struct edata_s") - self._Value = gdb.Value(self._addr).cast(edata_s.pointer()).dereference() + self._Value = pwndbg.aglib.memory.get_typed_pointer_value("struct edata_s", self._addr) self._bitfields = None @@ -423,14 +420,14 @@ class Extent: return (int(self._Value["e_size_esn"]) >> LG_PAGE) << LG_PAGE @property - def extent_address(self): + def extent_address(self) -> int: """ Address of the extent data structure (not the actual memory). """ return self._addr @property - def allocated_address(self): + def allocated_address(self) -> int: """ Starting address of allocated memory cache-oblivious large allocation alignment: @@ -438,18 +435,18 @@ class Extent: However, the pointer returned to user is randomized between the 'base' and 'base + 4 KiB' (0x1000) range. Source code: https://github.com/jemalloc/jemalloc/blob/a25b9b8ba91881964be3083db349991bbbbf1661/include/jemalloc/internal/arena_inlines_b.h#L505 """ - return self._Value["e_addr"] + return int(self._Value["e_addr"]) @property - def bsize(self): - return self._Value["e_bsize"] + def bsize(self) -> int: + return int(self._Value["e_bsize"]) @property - def bits(self): - return self._Value["e_bits"] + def bits(self) -> int: + return int(self._Value["e_bits"]) @property - def bitfields(self): + def bitfields(self) -> Dict[str, int]: """ Extract bitfields @@ -482,13 +479,13 @@ class Extent: return self._bitfields @property - def state_name(self): + def state_name(self) -> str: state_mapping = ["Active", "Dirty", "Muzzy", "Retained"] return state_mapping[self.bitfields["state"]] @property - def has_slab(self): + def has_slab(self) -> bool: """ Returns True if the extent is used for small size classes. Reference for size in Table 1 at https://jemalloc.net/jemalloc.3.html @@ -497,14 +494,14 @@ class Extent: return self.bitfields["slab"] != 0 @property - def is_free(self): + def is_free(self) -> bool: """ Returns True if the extent is free. """ pass @property - def pai(self): + def pai(self) -> str: """ Page Allocator Interface """ diff --git a/pwndbg/aglib/memory.py b/pwndbg/aglib/memory.py index c2735bbe4..9bdd3b1a5 100644 --- a/pwndbg/aglib/memory.py +++ b/pwndbg/aglib/memory.py @@ -343,6 +343,16 @@ def update_min_addr() -> None: MMAP_MIN_ADDR = 0 if pwndbg.aglib.qemu.is_qemu_kernel() else 0x8000 +def fetch_struct_as_dictionary( + struct_name: str, + struct_address: int | pwndbg.dbg_mod.Value, + include_only_fields: Set[str] | None = None, + exclude_fields: Set[str] | None = None, +) -> GdbDict: + fetched_struct = get_typed_pointer_value("struct " + struct_name, struct_address) + return pack_struct_into_dictionary(fetched_struct, include_only_fields, exclude_fields) + + def pack_struct_into_dictionary( fetched_struct: pwndbg.dbg_mod.Value, include_only_fields: Set[str] | None = None, diff --git a/pwndbg/dbg/gdb.py b/pwndbg/dbg/gdb.py index aa9855bba..0b0ab2a52 100644 --- a/pwndbg/dbg/gdb.py +++ b/pwndbg/dbg/gdb.py @@ -1046,7 +1046,7 @@ class GDBValue(pwndbg.dbg_mod.Value): @override def __getitem__(self, key: str | int) -> pwndbg.dbg_mod.Value: - if self.inner.type.code == gdb.TYPE_CODE_STRUCT and isinstance(key, int): + if isinstance(key, int) and self.inner.type.strip_typedefs().code == gdb.TYPE_CODE_STRUCT: # GDB doesn't normally support indexing fields in a struct by int, # so we nudge it a little. key = self.inner.type.fields()[key]