Fix jemalloc #2502 (#2512)

* add nix fmt

* fix capstone=5.0.3 on darwin

* nix fmt

* nix fmt

* port jemalloc to aglib

* fix aglib gdb value by index

* fix jemalloc typing

* fix jemalloc typing

* jemalloc improve times
pull/2529/head
patryk4815 1 year ago committed by GitHub
parent 233680bd22
commit feeb713396
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -93,5 +93,6 @@
isLLDB = true; isLLDB = true;
} }
); );
formatter = forAllSystems (system: pkgsBySystem.${system}.nixfmt-rfc-style);
}; };
} }

@ -16,7 +16,8 @@ let
{ {
drv ? null, drv ? null,
config ? "nfpm.yaml", config ? "nfpm.yaml",
packager ? null, # apk|deb|rpm|archlinux packager ? null,
# apk|deb|rpm|archlinux
preremove ? null, preremove ? null,
... ...
}@attrs: }@attrs:

@ -1,7 +1,6 @@
# This should be kept in sync with setup-dev.sh and lint.sh requirements # This should be kept in sync with setup-dev.sh and lint.sh requirements
{ {
pkgs ? pkgs ? # If pkgs is not defined, instantiate nixpkgs from locked commit
# If pkgs is not defined, instantiate nixpkgs from locked commit
let let
lock = (builtins.fromJSON (builtins.readFile ./flake.lock)).nodes.nixpkgs.locked; lock = (builtins.fromJSON (builtins.readFile ./flake.lock)).nodes.nixpkgs.locked;
nixpkgs = fetchTarball { nixpkgs = fetchTarball {
@ -17,7 +16,12 @@
}: }:
let let
pyEnv = import ./pyenv.nix { pyEnv = import ./pyenv.nix {
inherit pkgs python3 inputs isLLDB; inherit
pkgs
python3
inputs
isLLDB
;
lib = pkgs.lib; lib = pkgs.lib;
isDev = true; isDev = true;
}; };
@ -26,22 +30,27 @@ in
default = pkgs.mkShell { default = pkgs.mkShell {
NIX_CONFIG = "extra-experimental-features = nix-command flakes repl-flake"; NIX_CONFIG = "extra-experimental-features = nix-command flakes repl-flake";
# Anything not handled by the poetry env # Anything not handled by the poetry env
nativeBuildInputs = (with pkgs; [ nativeBuildInputs =
# from setup-dev.sh (with pkgs; [
nasm # from setup-dev.sh
gcc nasm
curl gcc
gdb curl
parallel gdb
qemu parallel
netcat-openbsd qemu
zig_0_10 # matches setup-dev.sh netcat-openbsd
go zig_0_10 # matches setup-dev.sh
go
pyEnv pyEnv
]) ++ pkgs.lib.optionals isLLDB (with pkgs; [ ])
lldb_19 ++ pkgs.lib.optionals isLLDB (
]); with pkgs;
[
lldb_19
]
);
shellHook = '' shellHook = ''
export PWNDBG_VENV_PATH="PWNDBG_PLEASE_SKIP_VENV" export PWNDBG_VENV_PATH="PWNDBG_PLEASE_SKIP_VENV"
export ZIGPATH="${pkgs.lib.getBin pkgs.zig_0_10}/bin/" export ZIGPATH="${pkgs.lib.getBin pkgs.zig_0_10}/bin/"

@ -27,7 +27,8 @@ let
python3 python3
inputs inputs
isDev isDev
isLLDB; isLLDB
;
lib = pkgs.lib; lib = pkgs.lib;
}; };
@ -40,66 +41,93 @@ let
'' ''
); );
pwndbg = let pwndbg =
pwndbgName = if isLLDB then "pwndbg-lldb" else "pwndbg"; let
in pkgs.stdenv.mkDerivation { pwndbgName = if isLLDB then "pwndbg-lldb" else "pwndbg";
name = pwndbgName; in
version = pwndbgVersion; pkgs.stdenv.mkDerivation {
name = pwndbgName;
version = pwndbgVersion;
src = pkgs.lib.sourceByRegex inputs.pwndbg ([ src = pkgs.lib.sourceByRegex inputs.pwndbg (
"pwndbg" [
"pwndbg/.*" "pwndbg"
] ++ (if isLLDB then [ "pwndbg/.*"
"lldbinit.py" ]
"pwndbg-lldb.py" ++ (
] else [ if isLLDB then
"gdbinit.py" [
])); "lldbinit.py"
"pwndbg-lldb.py"
]
else
[
"gdbinit.py"
]
)
);
nativeBuildInputs = [ pkgs.makeWrapper ]; nativeBuildInputs = [ pkgs.makeWrapper ];
buildInputs = [ pyEnv ]; buildInputs = [ pyEnv ];
installPhase = let installPhase =
fix_init_script = { target, line }: '' let
# Build self-contained init script for lazy loading from vanilla gdb fix_init_script =
# I purposely use insert() so I can re-import during development without having to restart gdb { target, line }:
sed "${line} i import sys, os\n\ ''
sys.path.insert(0, '${pyEnv}/${pyEnv.sitePackages}')\n\ # Build self-contained init script for lazy loading from vanilla gdb
sys.path.insert(0, '$out/share/pwndbg/')\n\ # I purposely use insert() so I can re-import during development without having to restart gdb
os.environ['PATH'] += ':${binPath}'\n" -i ${target} sed "${line} i import sys, os\n\
''; sys.path.insert(0, '${pyEnv}/${pyEnv.sitePackages}')\n\
in (if isLLDB then '' sys.path.insert(0, '$out/share/pwndbg/')\n\
mkdir -p $out/share/pwndbg os.environ['PATH'] += ':${binPath}'\n" -i ${target}
mkdir -p $out/bin '';
in
(
if isLLDB then
''
mkdir -p $out/share/pwndbg
mkdir -p $out/bin
cp -r lldbinit.py pwndbg $out/share/pwndbg cp -r lldbinit.py pwndbg $out/share/pwndbg
cp pwndbg-lldb.py $out/bin/${pwndbgName} cp pwndbg-lldb.py $out/bin/${pwndbgName}
${fix_init_script { target = "$out/bin/${pwndbgName}"; line = "4"; } } ${fix_init_script {
target = "$out/bin/${pwndbgName}";
line = "4";
}}
touch $out/share/pwndbg/.skip-venv touch $out/share/pwndbg/.skip-venv
wrapProgram $out/bin/${pwndbgName} \ wrapProgram $out/bin/${pwndbgName} \
--prefix PATH : ${ pkgs.lib.makeBinPath [ lldb ] } \ --prefix PATH : ${pkgs.lib.makeBinPath [ lldb ]} \
'' + (pkgs.lib.optionalString (!pkgs.stdenv.isDarwin) '' ''
--set LLDB_DEBUGSERVER_PATH ${ pkgs.lib.makeBinPath [ lldb ] }/lldb-server \ + (pkgs.lib.optionalString (!pkgs.stdenv.isDarwin) ''
'') + '' --set LLDB_DEBUGSERVER_PATH ${pkgs.lib.makeBinPath [ lldb ]}/lldb-server \
--set PWNDBG_LLDBINIT_DIR $out/share/pwndbg '')
'' else '' + ''
mkdir -p $out/share/pwndbg --set PWNDBG_LLDBINIT_DIR $out/share/pwndbg
''
else
''
mkdir -p $out/share/pwndbg
cp -r gdbinit.py pwndbg $out/share/pwndbg cp -r gdbinit.py pwndbg $out/share/pwndbg
${fix_init_script { target = "$out/share/pwndbg/gdbinit.py"; line = "2"; } } ${fix_init_script {
target = "$out/share/pwndbg/gdbinit.py";
line = "2";
}}
touch $out/share/pwndbg/.skip-venv touch $out/share/pwndbg/.skip-venv
makeWrapper ${gdb}/bin/gdb $out/bin/${pwndbgName} \ makeWrapper ${gdb}/bin/gdb $out/bin/${pwndbgName} \
--add-flags "--quiet --early-init-eval-command=\"set auto-load safe-path /\" --command=$out/share/pwndbg/gdbinit.py" --add-flags "--quiet --early-init-eval-command=\"set auto-load safe-path /\" --command=$out/share/pwndbg/gdbinit.py"
''); ''
);
meta = { meta = {
pwndbgVenv = pyEnv; pwndbgVenv = pyEnv;
python3 = python3; python3 = python3;
gdb = gdb; gdb = gdb;
};
}; };
};
in in
pwndbg pwndbg

@ -27,16 +27,30 @@ pkgs.poetry2nix.mkPoetryEnv {
pt = super.pt.overridePythonAttrs (old: { pt = super.pt.overridePythonAttrs (old: {
buildInputs = (old.buildInputs or [ ]) ++ [ super.poetry-core ]; buildInputs = (old.buildInputs or [ ]) ++ [ super.poetry-core ];
}); });
capstone = super.capstone.overridePythonAttrs (old: {
# fix darwin capstone =
preBuild = pkgs.lib.optionalString pkgs.stdenv.isDarwin '' # capstone=5.0.3 build is broken only in darwin :(, soo we use wheel
sed -i 's/^IS_APPLE := .*$/IS_APPLE := 1/' ./src/Makefile if pkgs.stdenv.isDarwin then
''; super.capstone.override { preferWheel = true; }
# fix build for aarch64: https://github.com/capstone-engine/capstone/issues/2102 else
postPatch = pkgs.lib.optionalString pkgs.stdenv.isLinux '' super.capstone.overridePythonAttrs (old: {
substituteInPlace setup.py --replace manylinux1 manylinux2014 # fix darwin
''; preBuild = pkgs.lib.optionalString pkgs.stdenv.isDarwin ''
}); sed -i 's/^IS_APPLE := .*$/IS_APPLE := 1/' ./src/Makefile
'';
# fix darwin
nativeBuildInputs =
(old.nativeBuildInputs or [ ])
++ pkgs.lib.optionals pkgs.stdenv.isDarwin [
pkgs.cmake
pkgs.fixDarwinDylibNames
];
# fix build for aarch64: https://github.com/capstone-engine/capstone/issues/2102
postPatch = pkgs.lib.optionalString pkgs.stdenv.isLinux ''
substituteInPlace setup.py --replace manylinux1 manylinux2014
'';
});
sortedcontainers-stubs = super.sortedcontainers-stubs.overridePythonAttrs (old: { sortedcontainers-stubs = super.sortedcontainers-stubs.overridePythonAttrs (old: {
buildInputs = (old.buildInputs or [ ]) ++ [ super.poetry-core ]; buildInputs = (old.buildInputs or [ ]) ++ [ super.poetry-core ];
}); });

@ -1,10 +1,9 @@
from __future__ import annotations from __future__ import annotations
import gdb from typing import Dict
import pwndbg.gdblib.info import pwndbg.aglib.memory
import pwndbg.gdblib.memory import pwndbg.aglib.typeinfo
import pwndbg.gdblib.typeinfo
# adapted from jemalloc source 5.3.0 # adapted from jemalloc source 5.3.0
LG_VADDR = 48 LG_VADDR = 48
@ -193,24 +192,23 @@ class RTree:
def __init__(self, addr: int) -> None: def __init__(self, addr: int) -> None:
self._addr = addr self._addr = addr
rtree_s = pwndbg.gdblib.typeinfo.load("struct rtree_s") # self._Value = pwndbg.aglib.memory.poi(emap_s, self._addr)
# self._Value = pwndbg.gdblib.memory.poi(emap_s, self._addr)
# self._Value = pwndbg.gdblib.memory.fetch_struct_as_dictionary( # self._Value = pwndbg.aglib.memory.fetch_struct_as_dictionary(
# "rtree_s", self._addr, include_only_fields={"root"} # "rtree_s", self._addr, include_only_fields={"root"}
# ) # )
self._Value = gdb.Value(self._addr).cast(rtree_s.pointer()).dereference() # pwndbg.aglib.memory
self._Value = pwndbg.aglib.memory.get_typed_pointer_value("struct rtree_s", self._addr)
self._extents = None self._extents = None
@staticmethod @staticmethod
def get_rtree() -> RTree: def get_rtree() -> RTree | None:
try: try:
addr = pwndbg.gdblib.info.address("je_arena_emap_global") addr = pwndbg.dbg.selected_inferior().symbol_address_from_name("je_arena_emap_global")
if addr is None: if addr is None:
return None return None
except pwndbg.dbg_mod.Error:
except gdb.MemoryError:
return None return None
return RTree(addr) return RTree(addr)
@ -232,12 +230,12 @@ class RTree:
return ptrbits - cumbits return ptrbits - cumbits
# Can be used to lookup key quickly in cache # Can be used to lookup key quickly in cache
def __rtree_leafkey(self, key, level): def __rtree_leafkey(self, key: int, level: int) -> int:
mask = ~((1 << self.__rtree_leaf_maskbits(level)) - 1) mask = ~((1 << self.__rtree_leaf_maskbits(level)) - 1)
# print("mask: ", mask, bin(mask)) # print("mask: ", mask, bin(mask))
return key & mask return key & mask
def __subkey(self, key, level): def __subkey(self, key: int, level: int) -> int:
""" """
Return a portion of the key that is used to find the node/leaf in the rtree at a specific level. Return a portion of the key that is used to find the node/leaf in the rtree at a specific level.
Source: https://github.com/jemalloc/jemalloc/blob/5b72ac098abce464add567869d082f2097bd59a2/include/jemalloc/internal/rtree.h#L161 Source: https://github.com/jemalloc/jemalloc/blob/5b72ac098abce464add567869d082f2097bd59a2/include/jemalloc/internal/rtree.h#L161
@ -255,15 +253,15 @@ class RTree:
def __alignment_addr2base(addr, alignment=64): def __alignment_addr2base(addr, alignment=64):
return addr - (addr - (addr & (~(alignment - 1)))) return addr - (addr - (addr & (~(alignment - 1))))
def lookup_hard(self, key): def lookup_hard(self, key: int):
""" """
Lookup the key in the rtree and return the value. Lookup the key in the rtree and return the value.
How it works: How it works:
- Jemalloc stores the extent address in the rtree as a node and to find a specific node we need a address key. - Jemalloc stores the extent address in the rtree as a node and to find a specific node we need a address key.
""" """
rtree_node_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_node_elm_s") rtree_node_elm_s = pwndbg.aglib.typeinfo.load("struct rtree_node_elm_s")
rtree_leaf_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_leaf_elm_s") rtree_leaf_elm_s = pwndbg.aglib.typeinfo.load("struct rtree_leaf_elm_s")
# Credits: 盏一's jegdb # Credits: 盏一's jegdb
@ -271,9 +269,10 @@ class RTree:
subkey = self.__subkey(key, 1) subkey = self.__subkey(key, 1)
addr = int(self.root.address) + subkey * rtree_node_elm_s.sizeof addr = int(self.root.address) + subkey * rtree_node_elm_s.sizeof
node = pwndbg.gdblib.memory.fetch_struct_as_dictionary("rtree_node_elm_s", addr) fetched_struct = pwndbg.aglib.memory.get_typed_pointer_value(
"struct rtree_node_elm_s", addr
child_repr: int = node["child"]["repr"] # type: ignore[index] )
child_repr = int(fetched_struct["child"]["repr"])
# on node element, child contains the bits with which we can find another node or leaf element # on node element, child contains the bits with which we can find another node or leaf element
if child_repr == 0: if child_repr == 0:
@ -282,10 +281,12 @@ class RTree:
# For subkey 1 # For subkey 1
subkey = self.__subkey(key, 2) subkey = self.__subkey(key, 2)
addr = child_repr + subkey * rtree_leaf_elm_s.sizeof addr = child_repr + subkey * rtree_leaf_elm_s.sizeof
leaf = pwndbg.gdblib.memory.fetch_struct_as_dictionary("rtree_leaf_elm_s", addr) fetched_struct = pwndbg.aglib.memory.get_typed_pointer_value(
"struct rtree_leaf_elm_s", addr
)
# On leaf element, le_bits contains the virtual memory address bits so we can use it to find the extent address # On leaf element, le_bits contains the virtual memory address bits so we can use it to find the extent address
val: int = leaf["le_bits"]["repr"] # type: ignore[index] val = int(fetched_struct["le_bits"]["repr"])
if val == 0: if val == 0:
return None return None
@ -325,21 +326,19 @@ class RTree:
last_addr = None last_addr = None
extent_addresses = [] extent_addresses = []
rtree_node_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_node_elm_s") rtree_node_elm_s = pwndbg.aglib.typeinfo.load("struct rtree_node_elm_s")
rtree_leaf_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_leaf_elm_s") rtree_leaf_elm_s = pwndbg.aglib.typeinfo.load("struct rtree_leaf_elm_s")
max_subkeys = 1 << rtree_levels[RTREE_HEIGHT - 1][0]["bits"] max_subkeys = 1 << rtree_levels[RTREE_HEIGHT - 1][0]["bits"]
# print("max_subkeys: ", max_subkeys) # print("max_subkeys: ", max_subkeys)
for i in range(max_subkeys): for i in range(max_subkeys):
node_address = int(root.address) + i * rtree_node_elm_s.sizeof node_address = int(root.address) + i * rtree_node_elm_s.sizeof
# node = pwndbg.gdblib.memory.poi(rtree_node_elm_s, node) # node = pwndbg.aglib.memory.poi(rtree_node_elm_s, node)
fetched_struct = pwndbg.gdblib.memory.get_typed_pointer_value( fetched_struct = pwndbg.aglib.memory.get_typed_pointer_value(
rtree_node_elm_s, node_address rtree_node_elm_s, node_address
) )
node = pwndbg.gdblib.memory.pack_struct_into_dictionary(fetched_struct) leaf0 = int(fetched_struct["child"]["repr"])
leaf0: int = node["child"]["repr"] # type: ignore[index]
if leaf0 == 0: if leaf0 == 0:
continue continue
@ -349,13 +348,12 @@ class RTree:
# level 1 # level 1
for j in range(max_subkeys): for j in range(max_subkeys):
leaf_address = leaf0 + j * rtree_leaf_elm_s.sizeof leaf_address = leaf0 + j * rtree_leaf_elm_s.sizeof
# leaf = pwndbg.gdblib.memory.poi(rtree_leaf_elm_s, leaf) # leaf = pwndbg.aglib.memory.poi(rtree_leaf_elm_s, leaf)
fetched_struct = pwndbg.gdblib.memory.get_typed_pointer_value( fetched_struct = pwndbg.aglib.memory.get_typed_pointer_value(
rtree_leaf_elm_s, leaf_address rtree_leaf_elm_s, leaf_address
) )
leaf = pwndbg.gdblib.memory.pack_struct_into_dictionary(fetched_struct) val = int(fetched_struct["le_bits"]["repr"])
if val == 0:
if (val := int(leaf["le_bits"]["repr"])) == 0: # type: ignore[index, arg-type]
continue continue
# print("leaf: ", hex(leaf_address)) # print("leaf: ", hex(leaf_address))
@ -389,7 +387,7 @@ class RTree:
self._extents.append(extent_tmp) self._extents.append(extent_tmp)
except gdb.MemoryError: except pwndbg.dbg_mod.Error:
pass pass
return self._extents return self._extents
@ -409,8 +407,7 @@ class Extent:
self._addr = addr self._addr = addr
# fetch_struct_as_dictionary does not support union currently # fetch_struct_as_dictionary does not support union currently
edata_s = pwndbg.gdblib.typeinfo.load("struct edata_s") self._Value = pwndbg.aglib.memory.get_typed_pointer_value("struct edata_s", self._addr)
self._Value = gdb.Value(self._addr).cast(edata_s.pointer()).dereference()
self._bitfields = None self._bitfields = None
@ -423,14 +420,14 @@ class Extent:
return (int(self._Value["e_size_esn"]) >> LG_PAGE) << LG_PAGE return (int(self._Value["e_size_esn"]) >> LG_PAGE) << LG_PAGE
@property @property
def extent_address(self): def extent_address(self) -> int:
""" """
Address of the extent data structure (not the actual memory). Address of the extent data structure (not the actual memory).
""" """
return self._addr return self._addr
@property @property
def allocated_address(self): def allocated_address(self) -> int:
""" """
Starting address of allocated memory Starting address of allocated memory
cache-oblivious large allocation alignment: cache-oblivious large allocation alignment:
@ -438,18 +435,18 @@ class Extent:
However, the pointer returned to user is randomized between the 'base' and 'base + 4 KiB' (0x1000) range. However, the pointer returned to user is randomized between the 'base' and 'base + 4 KiB' (0x1000) range.
Source code: https://github.com/jemalloc/jemalloc/blob/a25b9b8ba91881964be3083db349991bbbbf1661/include/jemalloc/internal/arena_inlines_b.h#L505 Source code: https://github.com/jemalloc/jemalloc/blob/a25b9b8ba91881964be3083db349991bbbbf1661/include/jemalloc/internal/arena_inlines_b.h#L505
""" """
return self._Value["e_addr"] return int(self._Value["e_addr"])
@property @property
def bsize(self): def bsize(self) -> int:
return self._Value["e_bsize"] return int(self._Value["e_bsize"])
@property @property
def bits(self): def bits(self) -> int:
return self._Value["e_bits"] return int(self._Value["e_bits"])
@property @property
def bitfields(self): def bitfields(self) -> Dict[str, int]:
""" """
Extract bitfields Extract bitfields
@ -482,13 +479,13 @@ class Extent:
return self._bitfields return self._bitfields
@property @property
def state_name(self): def state_name(self) -> str:
state_mapping = ["Active", "Dirty", "Muzzy", "Retained"] state_mapping = ["Active", "Dirty", "Muzzy", "Retained"]
return state_mapping[self.bitfields["state"]] return state_mapping[self.bitfields["state"]]
@property @property
def has_slab(self): def has_slab(self) -> bool:
""" """
Returns True if the extent is used for small size classes. Returns True if the extent is used for small size classes.
Reference for size in Table 1 at https://jemalloc.net/jemalloc.3.html Reference for size in Table 1 at https://jemalloc.net/jemalloc.3.html
@ -497,14 +494,14 @@ class Extent:
return self.bitfields["slab"] != 0 return self.bitfields["slab"] != 0
@property @property
def is_free(self): def is_free(self) -> bool:
""" """
Returns True if the extent is free. Returns True if the extent is free.
""" """
pass pass
@property @property
def pai(self): def pai(self) -> str:
""" """
Page Allocator Interface Page Allocator Interface
""" """

@ -343,6 +343,16 @@ def update_min_addr() -> None:
MMAP_MIN_ADDR = 0 if pwndbg.aglib.qemu.is_qemu_kernel() else 0x8000 MMAP_MIN_ADDR = 0 if pwndbg.aglib.qemu.is_qemu_kernel() else 0x8000
def fetch_struct_as_dictionary(
struct_name: str,
struct_address: int | pwndbg.dbg_mod.Value,
include_only_fields: Set[str] | None = None,
exclude_fields: Set[str] | None = None,
) -> GdbDict:
fetched_struct = get_typed_pointer_value("struct " + struct_name, struct_address)
return pack_struct_into_dictionary(fetched_struct, include_only_fields, exclude_fields)
def pack_struct_into_dictionary( def pack_struct_into_dictionary(
fetched_struct: pwndbg.dbg_mod.Value, fetched_struct: pwndbg.dbg_mod.Value,
include_only_fields: Set[str] | None = None, include_only_fields: Set[str] | None = None,

@ -1046,7 +1046,7 @@ class GDBValue(pwndbg.dbg_mod.Value):
@override @override
def __getitem__(self, key: str | int) -> pwndbg.dbg_mod.Value: def __getitem__(self, key: str | int) -> pwndbg.dbg_mod.Value:
if self.inner.type.code == gdb.TYPE_CODE_STRUCT and isinstance(key, int): if isinstance(key, int) and self.inner.type.strip_typedefs().code == gdb.TYPE_CODE_STRUCT:
# GDB doesn't normally support indexing fields in a struct by int, # GDB doesn't normally support indexing fields in a struct by int,
# so we nudge it a little. # so we nudge it a little.
key = self.inner.type.fields()[key] key = self.inner.type.fields()[key]

Loading…
Cancel
Save