Fix jemalloc #2502 (#2512)

* add nix fmt

* fix capstone=5.0.3 on darwin

* nix fmt

* nix fmt

* port jemalloc to aglib

* fix aglib gdb value by index

* fix jemalloc typing

* fix jemalloc typing

* jemalloc improve times
pull/2529/head
patryk4815 1 year ago committed by GitHub
parent 233680bd22
commit feeb713396
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -93,5 +93,6 @@
isLLDB = true;
}
);
formatter = forAllSystems (system: pkgsBySystem.${system}.nixfmt-rfc-style);
};
}

@ -16,7 +16,8 @@ let
{
drv ? null,
config ? "nfpm.yaml",
packager ? null, # apk|deb|rpm|archlinux
packager ? null,
# apk|deb|rpm|archlinux
preremove ? null,
...
}@attrs:

@ -1,7 +1,6 @@
# This should be kept in sync with setup-dev.sh and lint.sh requirements
{
pkgs ?
# If pkgs is not defined, instantiate nixpkgs from locked commit
pkgs ? # If pkgs is not defined, instantiate nixpkgs from locked commit
let
lock = (builtins.fromJSON (builtins.readFile ./flake.lock)).nodes.nixpkgs.locked;
nixpkgs = fetchTarball {
@ -17,7 +16,12 @@
}:
let
pyEnv = import ./pyenv.nix {
inherit pkgs python3 inputs isLLDB;
inherit
pkgs
python3
inputs
isLLDB
;
lib = pkgs.lib;
isDev = true;
};
@ -26,22 +30,27 @@ in
default = pkgs.mkShell {
NIX_CONFIG = "extra-experimental-features = nix-command flakes repl-flake";
# Anything not handled by the poetry env
nativeBuildInputs = (with pkgs; [
# from setup-dev.sh
nasm
gcc
curl
gdb
parallel
qemu
netcat-openbsd
zig_0_10 # matches setup-dev.sh
go
nativeBuildInputs =
(with pkgs; [
# from setup-dev.sh
nasm
gcc
curl
gdb
parallel
qemu
netcat-openbsd
zig_0_10 # matches setup-dev.sh
go
pyEnv
]) ++ pkgs.lib.optionals isLLDB (with pkgs; [
lldb_19
]);
pyEnv
])
++ pkgs.lib.optionals isLLDB (
with pkgs;
[
lldb_19
]
);
shellHook = ''
export PWNDBG_VENV_PATH="PWNDBG_PLEASE_SKIP_VENV"
export ZIGPATH="${pkgs.lib.getBin pkgs.zig_0_10}/bin/"

@ -27,7 +27,8 @@ let
python3
inputs
isDev
isLLDB;
isLLDB
;
lib = pkgs.lib;
};
@ -40,66 +41,93 @@ let
''
);
pwndbg = let
pwndbgName = if isLLDB then "pwndbg-lldb" else "pwndbg";
in pkgs.stdenv.mkDerivation {
name = pwndbgName;
version = pwndbgVersion;
pwndbg =
let
pwndbgName = if isLLDB then "pwndbg-lldb" else "pwndbg";
in
pkgs.stdenv.mkDerivation {
name = pwndbgName;
version = pwndbgVersion;
src = pkgs.lib.sourceByRegex inputs.pwndbg ([
"pwndbg"
"pwndbg/.*"
] ++ (if isLLDB then [
"lldbinit.py"
"pwndbg-lldb.py"
] else [
"gdbinit.py"
]));
src = pkgs.lib.sourceByRegex inputs.pwndbg (
[
"pwndbg"
"pwndbg/.*"
]
++ (
if isLLDB then
[
"lldbinit.py"
"pwndbg-lldb.py"
]
else
[
"gdbinit.py"
]
)
);
nativeBuildInputs = [ pkgs.makeWrapper ];
buildInputs = [ pyEnv ];
nativeBuildInputs = [ pkgs.makeWrapper ];
buildInputs = [ pyEnv ];
installPhase = let
fix_init_script = { target, line }: ''
# Build self-contained init script for lazy loading from vanilla gdb
# I purposely use insert() so I can re-import during development without having to restart gdb
sed "${line} i import sys, os\n\
sys.path.insert(0, '${pyEnv}/${pyEnv.sitePackages}')\n\
sys.path.insert(0, '$out/share/pwndbg/')\n\
os.environ['PATH'] += ':${binPath}'\n" -i ${target}
'';
in (if isLLDB then ''
mkdir -p $out/share/pwndbg
mkdir -p $out/bin
installPhase =
let
fix_init_script =
{ target, line }:
''
# Build self-contained init script for lazy loading from vanilla gdb
# I purposely use insert() so I can re-import during development without having to restart gdb
sed "${line} i import sys, os\n\
sys.path.insert(0, '${pyEnv}/${pyEnv.sitePackages}')\n\
sys.path.insert(0, '$out/share/pwndbg/')\n\
os.environ['PATH'] += ':${binPath}'\n" -i ${target}
'';
in
(
if isLLDB then
''
mkdir -p $out/share/pwndbg
mkdir -p $out/bin
cp -r lldbinit.py pwndbg $out/share/pwndbg
cp pwndbg-lldb.py $out/bin/${pwndbgName}
cp -r lldbinit.py pwndbg $out/share/pwndbg
cp pwndbg-lldb.py $out/bin/${pwndbgName}
${fix_init_script { target = "$out/bin/${pwndbgName}"; line = "4"; } }
${fix_init_script {
target = "$out/bin/${pwndbgName}";
line = "4";
}}
touch $out/share/pwndbg/.skip-venv
wrapProgram $out/bin/${pwndbgName} \
--prefix PATH : ${ pkgs.lib.makeBinPath [ lldb ] } \
'' + (pkgs.lib.optionalString (!pkgs.stdenv.isDarwin) ''
--set LLDB_DEBUGSERVER_PATH ${ pkgs.lib.makeBinPath [ lldb ] }/lldb-server \
'') + ''
--set PWNDBG_LLDBINIT_DIR $out/share/pwndbg
'' else ''
mkdir -p $out/share/pwndbg
touch $out/share/pwndbg/.skip-venv
wrapProgram $out/bin/${pwndbgName} \
--prefix PATH : ${pkgs.lib.makeBinPath [ lldb ]} \
''
+ (pkgs.lib.optionalString (!pkgs.stdenv.isDarwin) ''
--set LLDB_DEBUGSERVER_PATH ${pkgs.lib.makeBinPath [ lldb ]}/lldb-server \
'')
+ ''
--set PWNDBG_LLDBINIT_DIR $out/share/pwndbg
''
else
''
mkdir -p $out/share/pwndbg
cp -r gdbinit.py pwndbg $out/share/pwndbg
${fix_init_script { target = "$out/share/pwndbg/gdbinit.py"; line = "2"; } }
cp -r gdbinit.py pwndbg $out/share/pwndbg
${fix_init_script {
target = "$out/share/pwndbg/gdbinit.py";
line = "2";
}}
touch $out/share/pwndbg/.skip-venv
makeWrapper ${gdb}/bin/gdb $out/bin/${pwndbgName} \
--add-flags "--quiet --early-init-eval-command=\"set auto-load safe-path /\" --command=$out/share/pwndbg/gdbinit.py"
'');
touch $out/share/pwndbg/.skip-venv
makeWrapper ${gdb}/bin/gdb $out/bin/${pwndbgName} \
--add-flags "--quiet --early-init-eval-command=\"set auto-load safe-path /\" --command=$out/share/pwndbg/gdbinit.py"
''
);
meta = {
pwndbgVenv = pyEnv;
python3 = python3;
gdb = gdb;
meta = {
pwndbgVenv = pyEnv;
python3 = python3;
gdb = gdb;
};
};
};
in
pwndbg

@ -27,16 +27,30 @@ pkgs.poetry2nix.mkPoetryEnv {
pt = super.pt.overridePythonAttrs (old: {
buildInputs = (old.buildInputs or [ ]) ++ [ super.poetry-core ];
});
capstone = super.capstone.overridePythonAttrs (old: {
# fix darwin
preBuild = pkgs.lib.optionalString pkgs.stdenv.isDarwin ''
sed -i 's/^IS_APPLE := .*$/IS_APPLE := 1/' ./src/Makefile
'';
# fix build for aarch64: https://github.com/capstone-engine/capstone/issues/2102
postPatch = pkgs.lib.optionalString pkgs.stdenv.isLinux ''
substituteInPlace setup.py --replace manylinux1 manylinux2014
'';
});
capstone =
# capstone=5.0.3 build is broken only in darwin :(, soo we use wheel
if pkgs.stdenv.isDarwin then
super.capstone.override { preferWheel = true; }
else
super.capstone.overridePythonAttrs (old: {
# fix darwin
preBuild = pkgs.lib.optionalString pkgs.stdenv.isDarwin ''
sed -i 's/^IS_APPLE := .*$/IS_APPLE := 1/' ./src/Makefile
'';
# fix darwin
nativeBuildInputs =
(old.nativeBuildInputs or [ ])
++ pkgs.lib.optionals pkgs.stdenv.isDarwin [
pkgs.cmake
pkgs.fixDarwinDylibNames
];
# fix build for aarch64: https://github.com/capstone-engine/capstone/issues/2102
postPatch = pkgs.lib.optionalString pkgs.stdenv.isLinux ''
substituteInPlace setup.py --replace manylinux1 manylinux2014
'';
});
sortedcontainers-stubs = super.sortedcontainers-stubs.overridePythonAttrs (old: {
buildInputs = (old.buildInputs or [ ]) ++ [ super.poetry-core ];
});

@ -1,10 +1,9 @@
from __future__ import annotations
import gdb
from typing import Dict
import pwndbg.gdblib.info
import pwndbg.gdblib.memory
import pwndbg.gdblib.typeinfo
import pwndbg.aglib.memory
import pwndbg.aglib.typeinfo
# adapted from jemalloc source 5.3.0
LG_VADDR = 48
@ -193,24 +192,23 @@ class RTree:
def __init__(self, addr: int) -> None:
self._addr = addr
rtree_s = pwndbg.gdblib.typeinfo.load("struct rtree_s")
# self._Value = pwndbg.gdblib.memory.poi(emap_s, self._addr)
# self._Value = pwndbg.aglib.memory.poi(emap_s, self._addr)
# self._Value = pwndbg.gdblib.memory.fetch_struct_as_dictionary(
# self._Value = pwndbg.aglib.memory.fetch_struct_as_dictionary(
# "rtree_s", self._addr, include_only_fields={"root"}
# )
self._Value = gdb.Value(self._addr).cast(rtree_s.pointer()).dereference()
# pwndbg.aglib.memory
self._Value = pwndbg.aglib.memory.get_typed_pointer_value("struct rtree_s", self._addr)
self._extents = None
@staticmethod
def get_rtree() -> RTree:
def get_rtree() -> RTree | None:
try:
addr = pwndbg.gdblib.info.address("je_arena_emap_global")
addr = pwndbg.dbg.selected_inferior().symbol_address_from_name("je_arena_emap_global")
if addr is None:
return None
except gdb.MemoryError:
except pwndbg.dbg_mod.Error:
return None
return RTree(addr)
@ -232,12 +230,12 @@ class RTree:
return ptrbits - cumbits
# Can be used to lookup key quickly in cache
def __rtree_leafkey(self, key, level):
def __rtree_leafkey(self, key: int, level: int) -> int:
mask = ~((1 << self.__rtree_leaf_maskbits(level)) - 1)
# print("mask: ", mask, bin(mask))
return key & mask
def __subkey(self, key, level):
def __subkey(self, key: int, level: int) -> int:
"""
Return a portion of the key that is used to find the node/leaf in the rtree at a specific level.
Source: https://github.com/jemalloc/jemalloc/blob/5b72ac098abce464add567869d082f2097bd59a2/include/jemalloc/internal/rtree.h#L161
@ -255,15 +253,15 @@ class RTree:
def __alignment_addr2base(addr, alignment=64):
return addr - (addr - (addr & (~(alignment - 1))))
def lookup_hard(self, key):
def lookup_hard(self, key: int):
"""
Lookup the key in the rtree and return the value.
How it works:
- Jemalloc stores the extent address in the rtree as a node and to find a specific node we need a address key.
"""
rtree_node_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_node_elm_s")
rtree_leaf_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_leaf_elm_s")
rtree_node_elm_s = pwndbg.aglib.typeinfo.load("struct rtree_node_elm_s")
rtree_leaf_elm_s = pwndbg.aglib.typeinfo.load("struct rtree_leaf_elm_s")
# Credits: 盏一's jegdb
@ -271,9 +269,10 @@ class RTree:
subkey = self.__subkey(key, 1)
addr = int(self.root.address) + subkey * rtree_node_elm_s.sizeof
node = pwndbg.gdblib.memory.fetch_struct_as_dictionary("rtree_node_elm_s", addr)
child_repr: int = node["child"]["repr"] # type: ignore[index]
fetched_struct = pwndbg.aglib.memory.get_typed_pointer_value(
"struct rtree_node_elm_s", addr
)
child_repr = int(fetched_struct["child"]["repr"])
# on node element, child contains the bits with which we can find another node or leaf element
if child_repr == 0:
@ -282,10 +281,12 @@ class RTree:
# For subkey 1
subkey = self.__subkey(key, 2)
addr = child_repr + subkey * rtree_leaf_elm_s.sizeof
leaf = pwndbg.gdblib.memory.fetch_struct_as_dictionary("rtree_leaf_elm_s", addr)
fetched_struct = pwndbg.aglib.memory.get_typed_pointer_value(
"struct rtree_leaf_elm_s", addr
)
# On leaf element, le_bits contains the virtual memory address bits so we can use it to find the extent address
val: int = leaf["le_bits"]["repr"] # type: ignore[index]
val = int(fetched_struct["le_bits"]["repr"])
if val == 0:
return None
@ -325,21 +326,19 @@ class RTree:
last_addr = None
extent_addresses = []
rtree_node_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_node_elm_s")
rtree_leaf_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_leaf_elm_s")
rtree_node_elm_s = pwndbg.aglib.typeinfo.load("struct rtree_node_elm_s")
rtree_leaf_elm_s = pwndbg.aglib.typeinfo.load("struct rtree_leaf_elm_s")
max_subkeys = 1 << rtree_levels[RTREE_HEIGHT - 1][0]["bits"]
# print("max_subkeys: ", max_subkeys)
for i in range(max_subkeys):
node_address = int(root.address) + i * rtree_node_elm_s.sizeof
# node = pwndbg.gdblib.memory.poi(rtree_node_elm_s, node)
fetched_struct = pwndbg.gdblib.memory.get_typed_pointer_value(
# node = pwndbg.aglib.memory.poi(rtree_node_elm_s, node)
fetched_struct = pwndbg.aglib.memory.get_typed_pointer_value(
rtree_node_elm_s, node_address
)
node = pwndbg.gdblib.memory.pack_struct_into_dictionary(fetched_struct)
leaf0: int = node["child"]["repr"] # type: ignore[index]
leaf0 = int(fetched_struct["child"]["repr"])
if leaf0 == 0:
continue
@ -349,13 +348,12 @@ class RTree:
# level 1
for j in range(max_subkeys):
leaf_address = leaf0 + j * rtree_leaf_elm_s.sizeof
# leaf = pwndbg.gdblib.memory.poi(rtree_leaf_elm_s, leaf)
fetched_struct = pwndbg.gdblib.memory.get_typed_pointer_value(
# leaf = pwndbg.aglib.memory.poi(rtree_leaf_elm_s, leaf)
fetched_struct = pwndbg.aglib.memory.get_typed_pointer_value(
rtree_leaf_elm_s, leaf_address
)
leaf = pwndbg.gdblib.memory.pack_struct_into_dictionary(fetched_struct)
if (val := int(leaf["le_bits"]["repr"])) == 0: # type: ignore[index, arg-type]
val = int(fetched_struct["le_bits"]["repr"])
if val == 0:
continue
# print("leaf: ", hex(leaf_address))
@ -389,7 +387,7 @@ class RTree:
self._extents.append(extent_tmp)
except gdb.MemoryError:
except pwndbg.dbg_mod.Error:
pass
return self._extents
@ -409,8 +407,7 @@ class Extent:
self._addr = addr
# fetch_struct_as_dictionary does not support union currently
edata_s = pwndbg.gdblib.typeinfo.load("struct edata_s")
self._Value = gdb.Value(self._addr).cast(edata_s.pointer()).dereference()
self._Value = pwndbg.aglib.memory.get_typed_pointer_value("struct edata_s", self._addr)
self._bitfields = None
@ -423,14 +420,14 @@ class Extent:
return (int(self._Value["e_size_esn"]) >> LG_PAGE) << LG_PAGE
@property
def extent_address(self):
def extent_address(self) -> int:
"""
Address of the extent data structure (not the actual memory).
"""
return self._addr
@property
def allocated_address(self):
def allocated_address(self) -> int:
"""
Starting address of allocated memory
cache-oblivious large allocation alignment:
@ -438,18 +435,18 @@ class Extent:
However, the pointer returned to user is randomized between the 'base' and 'base + 4 KiB' (0x1000) range.
Source code: https://github.com/jemalloc/jemalloc/blob/a25b9b8ba91881964be3083db349991bbbbf1661/include/jemalloc/internal/arena_inlines_b.h#L505
"""
return self._Value["e_addr"]
return int(self._Value["e_addr"])
@property
def bsize(self):
return self._Value["e_bsize"]
def bsize(self) -> int:
return int(self._Value["e_bsize"])
@property
def bits(self):
return self._Value["e_bits"]
def bits(self) -> int:
return int(self._Value["e_bits"])
@property
def bitfields(self):
def bitfields(self) -> Dict[str, int]:
"""
Extract bitfields
@ -482,13 +479,13 @@ class Extent:
return self._bitfields
@property
def state_name(self):
def state_name(self) -> str:
state_mapping = ["Active", "Dirty", "Muzzy", "Retained"]
return state_mapping[self.bitfields["state"]]
@property
def has_slab(self):
def has_slab(self) -> bool:
"""
Returns True if the extent is used for small size classes.
Reference for size in Table 1 at https://jemalloc.net/jemalloc.3.html
@ -497,14 +494,14 @@ class Extent:
return self.bitfields["slab"] != 0
@property
def is_free(self):
def is_free(self) -> bool:
"""
Returns True if the extent is free.
"""
pass
@property
def pai(self):
def pai(self) -> str:
"""
Page Allocator Interface
"""

@ -343,6 +343,16 @@ def update_min_addr() -> None:
MMAP_MIN_ADDR = 0 if pwndbg.aglib.qemu.is_qemu_kernel() else 0x8000
def fetch_struct_as_dictionary(
struct_name: str,
struct_address: int | pwndbg.dbg_mod.Value,
include_only_fields: Set[str] | None = None,
exclude_fields: Set[str] | None = None,
) -> GdbDict:
fetched_struct = get_typed_pointer_value("struct " + struct_name, struct_address)
return pack_struct_into_dictionary(fetched_struct, include_only_fields, exclude_fields)
def pack_struct_into_dictionary(
fetched_struct: pwndbg.dbg_mod.Value,
include_only_fields: Set[str] | None = None,

@ -1046,7 +1046,7 @@ class GDBValue(pwndbg.dbg_mod.Value):
@override
def __getitem__(self, key: str | int) -> pwndbg.dbg_mod.Value:
if self.inner.type.code == gdb.TYPE_CODE_STRUCT and isinstance(key, int):
if isinstance(key, int) and self.inner.type.strip_typedefs().code == gdb.TYPE_CODE_STRUCT:
# GDB doesn't normally support indexing fields in a struct by int,
# so we nudge it a little.
key = self.inner.type.fields()[key]

Loading…
Cancel
Save