mirror of https://github.com/pwndbg/pwndbg.git
Basic jemalloc command for printing arenas info with bin (#2176)
* Basic jemalloc arenas info command Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * linter Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Hex address Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Added jemalloc to dev setup script and WIP code foor rtree parsing Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Lint changes Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Lint changes 2 Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Removed old commands, added jemalloc related class to generate bins and extent data Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Added RTree class Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Uncommented print lines for testing Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Added mask function from jemalloc Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Added heap command Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * First jemalloc test - experiment Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * some refactoring and tests Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * utilizing debugger-agnostic functions Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * use new api where possible Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Test run pre mypy fixes Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * minor fix for makefile Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Use regex match for address in test find extent Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * minor test change with lowered allocated memory Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Don't hardcode address for ptr in test Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Fix ptr not in breakpoint in test Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Cache oblivious adds 4KiB to allocations Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Corrected size in test Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Remove duplicate extents while parsing rtree Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Skip heap test Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * More explanation Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Added State name to extent info Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Added more comments Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Details about the lookup function Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Fixed double printing issue and some extents missing in heap Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Add check for addr alignment in lookup hard Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Removed prints for testing Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> * Update .devcontainer/devcontainer.json * Move jemalloc.py aglib/heap/jemalloc.py * Update test_heap.py: fix jemalloc support string * Update jemalloc.py: fix mypy issues --------- Signed-off-by: Chirag Aggarwal <thechiragaggarwal@gmail.com> Co-authored-by: Disconnect3d <dominik.b.czarnota@gmail.com>pull/2492/head
parent
b84a66f133
commit
3ecca0fc1e
@ -0,0 +1,551 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import gdb
|
||||
|
||||
import pwndbg.gdblib.info
|
||||
import pwndbg.gdblib.memory
|
||||
import pwndbg.gdblib.typeinfo
|
||||
|
||||
# adapted from jemalloc source 5.3.0
|
||||
LG_VADDR = 48
|
||||
LG_PAGE = 12
|
||||
# https://github.com/jemalloc/jemalloc/blob/a25b9b8ba91881964be3083db349991bbbbf1661/include/jemalloc/internal/jemalloc_internal_types.h#L42
|
||||
MALLOCX_ARENA_BITS = 12
|
||||
# https://github.com/jemalloc/jemalloc/blob/a25b9b8ba91881964be3083db349991bbbbf1661/include/jemalloc/jemalloc_defs.h.in#L51
|
||||
LG_SIZEOF_PTR = 3
|
||||
|
||||
RTREE_NHIB = (1 << (LG_SIZEOF_PTR + 3)) - LG_VADDR # Number of high insignificant bits
|
||||
RTREE_NLIB = LG_PAGE # Number of low insigificant bits
|
||||
RTREE_NSB = LG_VADDR - RTREE_NLIB # Number of significant bits
|
||||
|
||||
# Number of levels in radix tree
|
||||
if RTREE_NSB <= 10:
|
||||
RTREE_HEIGHT = 1
|
||||
elif RTREE_NSB <= 36:
|
||||
RTREE_HEIGHT = 2
|
||||
elif RTREE_NSB <= 52:
|
||||
RTREE_HEIGHT = 3
|
||||
else:
|
||||
raise ValueError("Unsupported number of significant virtual address bits")
|
||||
|
||||
|
||||
# TODO: RTREE_LEAF_COMPACT should be enabled otherwise rtree_leaf_elm_s would change
|
||||
|
||||
# TODO: Move to relevant place
|
||||
# https://github.com/jemalloc/jemalloc/blob/a25b9b8ba91881964be3083db349991bbbbf1661/include/jemalloc/internal/edata.h#L145
|
||||
|
||||
|
||||
def mask(current_field_width, current_field_shift):
|
||||
return ((1 << current_field_width) - 1) << current_field_shift
|
||||
|
||||
|
||||
# For size class related explanation and calculations, refer to https://github.com/jemalloc/jemalloc/blob/a25b9b8ba91881964be3083db349991bbbbf1661/include/jemalloc/internal/sc.h#L8
|
||||
|
||||
LG_QUANTUM = 4 # LG_QUANTUM ensures correct platform alignment and necessary to ensure we never return improperly aligned memory
|
||||
|
||||
SC_LG_TINY_MIN = 3
|
||||
SC_NTINY = (
|
||||
LG_QUANTUM - SC_LG_TINY_MIN
|
||||
) # Number of tiny size classes for alloations smaller than (1 << LG_QUANTUM)
|
||||
|
||||
# Size classes
|
||||
SC_LG_NGROUP = 2 # Number of size classes group
|
||||
SC_NGROUP = (
|
||||
1 << SC_LG_NGROUP
|
||||
) # Number of size classes in each group, equally spaced in the range, so that * each one covers allocations for base / SC_NGROUP possible allocation sizes
|
||||
SC_NPSEUDO = SC_NGROUP
|
||||
SC_PTR_BITS = (1 << LG_SIZEOF_PTR) * 8
|
||||
SC_LG_BASE_MAX = SC_PTR_BITS - 2
|
||||
SC_LG_FIRST_REGULAR_BASE = LG_QUANTUM + SC_LG_NGROUP
|
||||
SC_NREGULAR = SC_NGROUP * (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1
|
||||
|
||||
SC_NSIZES = SC_NTINY + SC_NPSEUDO + SC_NREGULAR
|
||||
|
||||
SC_LG_SLAB_MAXREGS = LG_PAGE - SC_LG_TINY_MIN
|
||||
|
||||
|
||||
# Source: https://github.com/jemalloc/jemalloc/blob/dev/include/jemalloc/internal/bit_util.h#L400-L419
|
||||
def lg_floor_1(x):
|
||||
return 0
|
||||
|
||||
|
||||
def lg_floor_2(x):
|
||||
return lg_floor_1(x) if x < (1 << 1) else 1 + lg_floor_1(x >> 1)
|
||||
|
||||
|
||||
def lg_floor_4(x):
|
||||
return lg_floor_2(x) if x < (1 << 2) else 2 + lg_floor_2(x >> 2)
|
||||
|
||||
|
||||
def lg_floor_8(x):
|
||||
return lg_floor_4(x) if x < (1 << 4) else 4 + lg_floor_4(x >> 4)
|
||||
|
||||
|
||||
def lg_floor_16(x):
|
||||
return lg_floor_8(x) if x < (1 << 8) else 8 + lg_floor_8(x >> 8)
|
||||
|
||||
|
||||
def lg_floor_32(x):
|
||||
return lg_floor_16(x) if x < (1 << 16) else 16 + lg_floor_16(x >> 16)
|
||||
|
||||
|
||||
def lg_floor_64(x):
|
||||
return lg_floor_32(x) if x < (1 << 32) else 32 + lg_floor_32(x >> 32)
|
||||
|
||||
|
||||
def lg_floor(x):
|
||||
return lg_floor_32(x) if LG_SIZEOF_PTR == 2 else lg_floor_64(x)
|
||||
|
||||
|
||||
def lg_ceil(x):
|
||||
return lg_floor(x) + (0 if (x & (x - 1)) == 0 else 1)
|
||||
|
||||
|
||||
# Arena width and mask definitions
|
||||
EDATA_BITS_ARENA_WIDTH = MALLOCX_ARENA_BITS
|
||||
EDATA_BITS_ARENA_SHIFT = 0
|
||||
EDATA_BITS_ARENA_MASK = mask(EDATA_BITS_ARENA_WIDTH, EDATA_BITS_ARENA_SHIFT)
|
||||
|
||||
# Slab width and mask definitions
|
||||
EDATA_BITS_SLAB_WIDTH = 1
|
||||
EDATA_BITS_SLAB_SHIFT = EDATA_BITS_ARENA_WIDTH + EDATA_BITS_ARENA_SHIFT
|
||||
EDATA_BITS_SLAB_MASK = mask(EDATA_BITS_SLAB_WIDTH, EDATA_BITS_SLAB_SHIFT)
|
||||
|
||||
# Committed width and mask definitions
|
||||
EDATA_BITS_COMMITTED_WIDTH = 1
|
||||
EDATA_BITS_COMMITTED_SHIFT = EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT
|
||||
EDATA_BITS_COMMITTED_MASK = mask(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
|
||||
|
||||
# PAI width and mask definitions
|
||||
EDATA_BITS_PAI_WIDTH = 1
|
||||
EDATA_BITS_PAI_SHIFT = EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT
|
||||
EDATA_BITS_PAI_MASK = mask(EDATA_BITS_PAI_WIDTH, EDATA_BITS_PAI_SHIFT)
|
||||
|
||||
# Zeroed width and mask definitions
|
||||
EDATA_BITS_ZEROED_WIDTH = 1
|
||||
EDATA_BITS_ZEROED_SHIFT = EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT
|
||||
EDATA_BITS_ZEROED_MASK = mask(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
|
||||
|
||||
# Guarded width and mask definitions
|
||||
EDATA_BITS_GUARDED_WIDTH = 1
|
||||
EDATA_BITS_GUARDED_SHIFT = EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT
|
||||
EDATA_BITS_GUARDED_MASK = mask(EDATA_BITS_GUARDED_WIDTH, EDATA_BITS_GUARDED_SHIFT)
|
||||
|
||||
# State width and mask definitions
|
||||
EDATA_BITS_STATE_WIDTH = 3
|
||||
EDATA_BITS_STATE_SHIFT = EDATA_BITS_GUARDED_WIDTH + EDATA_BITS_GUARDED_SHIFT
|
||||
EDATA_BITS_STATE_MASK = mask(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
|
||||
|
||||
EDATA_BITS_SZIND_WIDTH = lg_ceil(SC_NSIZES)
|
||||
EDATA_BITS_SZIND_SHIFT = EDATA_BITS_STATE_WIDTH + EDATA_BITS_STATE_SHIFT
|
||||
EDATA_BITS_SZIND_MASK = mask(EDATA_BITS_SZIND_WIDTH, EDATA_BITS_SZIND_SHIFT)
|
||||
|
||||
# Nfree width and mask definitions
|
||||
EDATA_BITS_NFREE_WIDTH = SC_LG_SLAB_MAXREGS + 1
|
||||
EDATA_BITS_NFREE_SHIFT = EDATA_BITS_SZIND_WIDTH + EDATA_BITS_SZIND_SHIFT
|
||||
EDATA_BITS_NFREE_MASK = mask(EDATA_BITS_NFREE_WIDTH, EDATA_BITS_NFREE_SHIFT)
|
||||
|
||||
# Binshard width and mask definitions
|
||||
EDATA_BITS_BINSHARD_WIDTH = 6
|
||||
EDATA_BITS_BINSHARD_SHIFT = EDATA_BITS_NFREE_WIDTH + EDATA_BITS_NFREE_SHIFT
|
||||
EDATA_BITS_BINSHARD_MASK = mask(EDATA_BITS_BINSHARD_WIDTH, EDATA_BITS_BINSHARD_SHIFT)
|
||||
|
||||
# Is head width and mask definitions
|
||||
EDATA_BITS_IS_HEAD_WIDTH = 1
|
||||
EDATA_BITS_IS_HEAD_SHIFT = EDATA_BITS_BINSHARD_WIDTH + EDATA_BITS_BINSHARD_SHIFT
|
||||
EDATA_BITS_IS_HEAD_MASK = mask(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT)
|
||||
|
||||
# In RTree, Each level distinguishes a certain number of bits from the key, which helps in narrowing down the search space
|
||||
# bits: how many bits have been used at that particular level (Number of key bits distinguished by this level)
|
||||
# cumbits: how many bits in total have been used up to that level (Cumulative number of key bits distinguished by traversing to corresponding tree level)
|
||||
rtree_levels = [
|
||||
# for height == 1
|
||||
[{"bits": RTREE_NSB, "cumbits": RTREE_NHIB + RTREE_NSB}],
|
||||
# for height == 2
|
||||
[
|
||||
{"bits": RTREE_NSB // 2, "cumbits": RTREE_NHIB + RTREE_NSB // 2},
|
||||
{"bits": RTREE_NSB // 2 + RTREE_NSB % 2, "cumbits": RTREE_NHIB + RTREE_NSB},
|
||||
],
|
||||
# for height == 3
|
||||
[
|
||||
{"bits": RTREE_NSB // 3, "cumbits": RTREE_NHIB + RTREE_NSB // 3},
|
||||
{
|
||||
"bits": RTREE_NSB // 3 + RTREE_NSB % 3 // 2,
|
||||
"cumbits": RTREE_NHIB + RTREE_NSB // 3 * 2 + RTREE_NSB % 3 // 2,
|
||||
},
|
||||
{
|
||||
"bits": RTREE_NSB // 3 + RTREE_NSB % 3 - RTREE_NSB % 3 // 2,
|
||||
"cumbits": RTREE_NHIB + RTREE_NSB,
|
||||
},
|
||||
],
|
||||
]
|
||||
|
||||
|
||||
class RTree:
|
||||
"""
|
||||
RTree is used by jemalloc to keep track of extents that are allocated by jemalloc.
|
||||
Since extent data is not stored in a doubly linked list, rtree is used to find the extent belonging to a pointer that is being freed.
|
||||
Implementation of rtree is similar to Linux Radix tree: https://lwn.net/Articles/175432/
|
||||
"""
|
||||
|
||||
# TODO: Check rtee_ctx cache in
|
||||
# tsd_nominal_tsds.qlh_first.cant_access_tsd_items_directly_use_a_getter_or_setter_rtree_ctx.cache
|
||||
def __init__(self, addr: int) -> None:
|
||||
self._addr = addr
|
||||
|
||||
rtree_s = pwndbg.gdblib.typeinfo.load("struct rtree_s")
|
||||
# self._Value = pwndbg.gdblib.memory.poi(emap_s, self._addr)
|
||||
|
||||
# self._Value = pwndbg.gdblib.memory.fetch_struct_as_dictionary(
|
||||
# "rtree_s", self._addr, include_only_fields={"root"}
|
||||
# )
|
||||
self._Value = gdb.Value(self._addr).cast(rtree_s.pointer()).dereference()
|
||||
|
||||
self._extents = None
|
||||
|
||||
@staticmethod
|
||||
def get_rtree() -> RTree:
|
||||
try:
|
||||
addr = pwndbg.gdblib.info.address("je_arena_emap_global")
|
||||
if addr is None:
|
||||
return None
|
||||
|
||||
except gdb.MemoryError:
|
||||
return None
|
||||
|
||||
return RTree(addr)
|
||||
|
||||
@property
|
||||
def root(self):
|
||||
return self._Value["root"]
|
||||
|
||||
# from include/jemalloc/internal/rtree.h
|
||||
# converted implementation of rtree_leafkey
|
||||
def __rtree_leaf_maskbits(self, level):
|
||||
ptrbits = 1 << (LG_SIZEOF_PTR + 3)
|
||||
# print("ptrbits: ", ptrbits, bin(ptrbits))
|
||||
cumbits = (
|
||||
rtree_levels[RTREE_HEIGHT - 1][level - 1]["cumbits"]
|
||||
- rtree_levels[RTREE_HEIGHT - 1][level - 1]["bits"]
|
||||
)
|
||||
# print("cumbits: ", cumbits, bin(cumbits))
|
||||
return ptrbits - cumbits
|
||||
|
||||
# Can be used to lookup key quickly in cache
|
||||
def __rtree_leafkey(self, key, level):
|
||||
mask = ~((1 << self.__rtree_leaf_maskbits(level)) - 1)
|
||||
# print("mask: ", mask, bin(mask))
|
||||
return key & mask
|
||||
|
||||
def __subkey(self, key, level):
|
||||
"""
|
||||
Return a portion of the key that is used to find the node/leaf in the rtree at a specific level.
|
||||
Source: https://github.com/jemalloc/jemalloc/blob/5b72ac098abce464add567869d082f2097bd59a2/include/jemalloc/internal/rtree.h#L161
|
||||
"""
|
||||
|
||||
ptrbits = 1 << (LG_SIZEOF_PTR + 3)
|
||||
cumbits = rtree_levels[RTREE_HEIGHT - 1][level - 1]["cumbits"]
|
||||
shiftbits = ptrbits - cumbits
|
||||
maskbits = rtree_levels[RTREE_HEIGHT - 1][level - 1]["bits"]
|
||||
mask = (1 << maskbits) - 1
|
||||
|
||||
return (key >> shiftbits) & mask
|
||||
|
||||
@staticmethod
|
||||
def __alignment_addr2base(addr, alignment=64):
|
||||
return addr - (addr - (addr & (~(alignment - 1))))
|
||||
|
||||
def lookup_hard(self, key):
|
||||
"""
|
||||
Lookup the key in the rtree and return the value.
|
||||
|
||||
How it works:
|
||||
- Jemalloc stores the extent address in the rtree as a node and to find a specific node we need a address key.
|
||||
"""
|
||||
rtree_node_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_node_elm_s")
|
||||
rtree_leaf_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_leaf_elm_s")
|
||||
|
||||
# Credits: 盏一's jegdb
|
||||
|
||||
# For subkey 0
|
||||
subkey = self.__subkey(key, 1)
|
||||
|
||||
addr = int(self.root.address) + subkey * rtree_node_elm_s.sizeof
|
||||
node = pwndbg.gdblib.memory.fetch_struct_as_dictionary("rtree_node_elm_s", addr)
|
||||
|
||||
child_repr: int = node["child"]["repr"] # type: ignore[index]
|
||||
|
||||
# on node element, child contains the bits with which we can find another node or leaf element
|
||||
if child_repr == 0:
|
||||
return None
|
||||
|
||||
# For subkey 1
|
||||
subkey = self.__subkey(key, 2)
|
||||
addr = child_repr + subkey * rtree_leaf_elm_s.sizeof
|
||||
leaf = pwndbg.gdblib.memory.fetch_struct_as_dictionary("rtree_leaf_elm_s", addr)
|
||||
|
||||
# On leaf element, le_bits contains the virtual memory address bits so we can use it to find the extent address
|
||||
val: int = leaf["le_bits"]["repr"] # type: ignore[index]
|
||||
if val == 0:
|
||||
return None
|
||||
|
||||
# In this function, we are trying to find the extent address given the address of memory block
|
||||
# that this extent is managing (which is represented by edata->e_addr in the extent structure)
|
||||
|
||||
# e_addr is 64 bits but
|
||||
# e_addr is also page (4096) aligned which means last 12 bits are zero and therefore unused
|
||||
# In rtree, each layer can be accessed using bits 0-16, 17-33 and 34-51
|
||||
# When height of rtree is 3, level 1 can be accessed using bits 0-16, and so on for level 2 and 3
|
||||
# When the height is 2, 0-15 bits are unused and level 1 can be accessed using bits 16-33 and level 2 using 34-51
|
||||
|
||||
ls = (val << RTREE_NHIB) & ((2**64) - 1)
|
||||
ptr = ((ls >> RTREE_NHIB) >> 1) << 1
|
||||
|
||||
if ptr == 0:
|
||||
return None
|
||||
|
||||
# return Extent(ptr)
|
||||
extent = Extent(ptr)
|
||||
if extent.size == 0:
|
||||
ptr = RTree.__alignment_addr2base(ptr)
|
||||
extent_tmp = Extent(ptr)
|
||||
if extent_tmp.size != 0:
|
||||
return extent_tmp
|
||||
|
||||
return extent
|
||||
|
||||
@property
|
||||
def extents(self):
|
||||
# NOTE: Generating whole extents list is slow as it requires parsing whole rtree
|
||||
|
||||
if self._extents is None: # TODO: handling cache on extents changes
|
||||
self._extents = []
|
||||
try:
|
||||
root = self.root
|
||||
last_addr = None
|
||||
extent_addresses = []
|
||||
|
||||
rtree_node_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_node_elm_s")
|
||||
rtree_leaf_elm_s = pwndbg.gdblib.typeinfo.load("struct rtree_leaf_elm_s")
|
||||
|
||||
max_subkeys = 1 << rtree_levels[RTREE_HEIGHT - 1][0]["bits"]
|
||||
# print("max_subkeys: ", max_subkeys)
|
||||
|
||||
for i in range(max_subkeys):
|
||||
node_address = int(root.address) + i * rtree_node_elm_s.sizeof
|
||||
# node = pwndbg.gdblib.memory.poi(rtree_node_elm_s, node)
|
||||
fetched_struct = pwndbg.gdblib.memory.get_typed_pointer_value(
|
||||
rtree_node_elm_s, node_address
|
||||
)
|
||||
node = pwndbg.gdblib.memory.pack_struct_into_dictionary(fetched_struct)
|
||||
|
||||
leaf0: int = node["child"]["repr"] # type: ignore[index]
|
||||
if leaf0 == 0:
|
||||
continue
|
||||
|
||||
# print(hex(leaf0))
|
||||
# print("leaf0: ", leaf0)
|
||||
|
||||
# level 1
|
||||
for j in range(max_subkeys):
|
||||
leaf_address = leaf0 + j * rtree_leaf_elm_s.sizeof
|
||||
# leaf = pwndbg.gdblib.memory.poi(rtree_leaf_elm_s, leaf)
|
||||
fetched_struct = pwndbg.gdblib.memory.get_typed_pointer_value(
|
||||
rtree_leaf_elm_s, leaf_address
|
||||
)
|
||||
leaf = pwndbg.gdblib.memory.pack_struct_into_dictionary(fetched_struct)
|
||||
|
||||
if (val := int(leaf["le_bits"]["repr"])) == 0: # type: ignore[index, arg-type]
|
||||
continue
|
||||
|
||||
# print("leaf: ", hex(leaf_address))
|
||||
# print(j, leaf)
|
||||
|
||||
ls = (val << RTREE_NHIB) & ((2**64) - 1)
|
||||
ptr = ((ls >> RTREE_NHIB) >> 1) << 1
|
||||
|
||||
if ptr == 0 or ptr == last_addr:
|
||||
continue
|
||||
|
||||
last_addr = ptr
|
||||
|
||||
extent = Extent(ptr)
|
||||
|
||||
if extent.extent_address in extent_addresses:
|
||||
continue
|
||||
|
||||
extent_addresses.append(extent.extent_address)
|
||||
|
||||
# during initializations, addresses may get some alignment
|
||||
# lets check if size makes sense, otherwise do page alignment and check if again
|
||||
# TODO: better way to do this
|
||||
extent_tmp = extent
|
||||
if extent.size == 0:
|
||||
ptr = RTree.__alignment_addr2base(int(ptr))
|
||||
extent_tmp = Extent(ptr)
|
||||
if extent_tmp.size != 0:
|
||||
self._extents.append(extent_tmp)
|
||||
continue
|
||||
|
||||
self._extents.append(extent_tmp)
|
||||
|
||||
except gdb.MemoryError:
|
||||
pass
|
||||
|
||||
return self._extents
|
||||
|
||||
|
||||
class Arena:
|
||||
"""
|
||||
Some notes:
|
||||
- Huge allocation should not come from arena 0
|
||||
"""
|
||||
|
||||
def __init__(self, addr: int) -> None:
|
||||
self._addr = addr
|
||||
|
||||
self._Value = pwndbg.gdblib.memory.fetch_struct_as_dictionary("arena_s", self._addr)
|
||||
|
||||
self._nbins = None
|
||||
self._slabs = None
|
||||
self._bins = None
|
||||
|
||||
@property
|
||||
def slabs(self):
|
||||
if self._bins is None:
|
||||
self._bins = []
|
||||
try:
|
||||
# TODO: verify this variable
|
||||
self._nbins = gdb.parse_and_eval("nbins_total").cast(
|
||||
gdb.lookup_type("unsigned int")
|
||||
)
|
||||
|
||||
bins_addr = int(self._Value["bins"]["address"]) # type: ignore[index, arg-type]
|
||||
bin_s = pwndbg.gdblib.typeinfo.load("struct bin_s")
|
||||
for i in range(self._nbins):
|
||||
current_bin_addr = int(bins_addr) + i * bin_s.sizeof
|
||||
bin = pwndbg.gdblib.memory.poi(bin_s, current_bin_addr)
|
||||
self._slabs.append(bin)
|
||||
|
||||
except gdb.MemoryError:
|
||||
pass
|
||||
|
||||
return self._slabs
|
||||
|
||||
|
||||
class Extent:
|
||||
"""
|
||||
Concept of extent (edata) is similar to chunk in glibc malloc but allocation algorithm differs a lot.
|
||||
- Extents are used to manage memory blocks (including jemalloc metadata) where extents sizes can vary but each block is always a multiple of the page size.
|
||||
- jemalloc will either allocate one large class request or multiple small class request (called slab) depending on request size.
|
||||
- Unlike chunks in glibc malloc, extents are not doubly linked list but are managed using rtree.
|
||||
- This tree is mostly used during deallocation to find the extent belonging to a pointer that is being freed.
|
||||
- Extents are also not stored as a header structure but externally (therefore extent metadata and actually mapped data may be very far apart).
|
||||
"""
|
||||
|
||||
def __init__(self, addr: int) -> None:
|
||||
self._addr = addr
|
||||
|
||||
# fetch_struct_as_dictionary does not support union currently
|
||||
edata_s = pwndbg.gdblib.typeinfo.load("struct edata_s")
|
||||
self._Value = gdb.Value(self._addr).cast(edata_s.pointer()).dereference()
|
||||
|
||||
self._bitfields = None
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
"""
|
||||
May be larger in case of large size class allocation when cache_oblivious is enabled.
|
||||
"""
|
||||
# return self._Value["e_size_esn"]
|
||||
return (int(self._Value["e_size_esn"]) >> LG_PAGE) << LG_PAGE
|
||||
|
||||
@property
|
||||
def extent_address(self):
|
||||
"""
|
||||
Address of the extent data structure (not the actual memory).
|
||||
"""
|
||||
return self._addr
|
||||
|
||||
@property
|
||||
def allocated_address(self):
|
||||
"""
|
||||
Starting address of allocated memory
|
||||
cache-oblivious large allocation alignment:
|
||||
When a large class allocation is made, jemalloc selects the closest size class that can fit the request and allocates that size + 4 KiB (0x1000).
|
||||
However, the pointer returned to user is randomized between the 'base' and 'base + 4 KiB' (0x1000) range.
|
||||
Source code: https://github.com/jemalloc/jemalloc/blob/a25b9b8ba91881964be3083db349991bbbbf1661/include/jemalloc/internal/arena_inlines_b.h#L505
|
||||
"""
|
||||
return self._Value["e_addr"]
|
||||
|
||||
@property
|
||||
def bsize(self):
|
||||
return self._Value["e_bsize"]
|
||||
|
||||
@property
|
||||
def bits(self):
|
||||
return self._Value["e_bits"]
|
||||
|
||||
@property
|
||||
def bitfields(self):
|
||||
"""
|
||||
Extract bitfields
|
||||
|
||||
arena_ind: Arena from which this extent came, or all 1 bits if unassociated.
|
||||
slab: The slab flag indicates whether the extent is used for a slab of small regions. This helps differentiate small size classes, and it indicates whether interior pointers can be looked up via iealloc().
|
||||
committed: The committed flag indicates whether physical memory is committed to the extent, whether explicitly or implicitly as on a system that overcommits and satisfies physical memory needs on demand via soft page faults.
|
||||
pai: The pai flag is an extent_pai_t.
|
||||
zeroed: The zeroed flag is used by extent recycling code to track whether memory is zero-filled.
|
||||
guarded: The guarded flag is used by the sanitizer to track whether the extent has page guards around it.
|
||||
state: The state flag is an extent_state_t.
|
||||
szind: The szind flag indicates usable size class index for allocations residing in this extent, regardless of whether the extent is a slab. Extent size and usable size often differ even for non-slabs, either due to sz_large_pad or promotion of sampled small regions.
|
||||
nfree: Number of free regions in slab.
|
||||
bin_shard: The shard of the bin from which this extent came.
|
||||
"""
|
||||
|
||||
if self._bitfields is None:
|
||||
self._bitfields = {
|
||||
"arena_ind": (self.bits & EDATA_BITS_ARENA_MASK) >> EDATA_BITS_ARENA_SHIFT,
|
||||
"slab": (self.bits & EDATA_BITS_SLAB_MASK) >> EDATA_BITS_SLAB_SHIFT,
|
||||
"committed": (self.bits & EDATA_BITS_COMMITTED_MASK) >> EDATA_BITS_COMMITTED_SHIFT,
|
||||
"pai": (self.bits & EDATA_BITS_PAI_MASK) >> EDATA_BITS_PAI_SHIFT,
|
||||
"zeroed": (self.bits & EDATA_BITS_ZEROED_MASK) >> EDATA_BITS_ZEROED_SHIFT,
|
||||
"guarded": (self.bits & EDATA_BITS_GUARDED_MASK) >> EDATA_BITS_GUARDED_SHIFT,
|
||||
"state": (self.bits & EDATA_BITS_STATE_MASK) >> EDATA_BITS_STATE_SHIFT,
|
||||
"szind": (self.bits & EDATA_BITS_SZIND_MASK) >> EDATA_BITS_SZIND_SHIFT,
|
||||
"nfree": (self.bits & EDATA_BITS_NFREE_MASK) >> EDATA_BITS_NFREE_SHIFT,
|
||||
"bin_shard": (self.bits & EDATA_BITS_BINSHARD_MASK) >> EDATA_BITS_BINSHARD_SHIFT,
|
||||
}
|
||||
|
||||
return self._bitfields
|
||||
|
||||
@property
|
||||
def state_name(self):
|
||||
state_mapping = ["Active", "Dirty", "Muzzy", "Retained"]
|
||||
|
||||
return state_mapping[self.bitfields["state"]]
|
||||
|
||||
@property
|
||||
def has_slab(self):
|
||||
"""
|
||||
Returns True if the extent is used for small size classes.
|
||||
Reference for size in Table 1 at https://jemalloc.net/jemalloc.3.html
|
||||
At time of writing, allocations <= 0x3800 are considered as small allocations and has slabs.
|
||||
"""
|
||||
return self.bitfields["slab"] != 0
|
||||
|
||||
@property
|
||||
def is_free(self):
|
||||
"""
|
||||
Returns True if the extent is free.
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
def pai(self):
|
||||
"""
|
||||
Page Allocator Interface
|
||||
"""
|
||||
if self.bitfields["pai"] == 0:
|
||||
return "PAC" # Page for extent
|
||||
return "HPA" # Huge Page
|
||||
@ -0,0 +1,17 @@
|
||||
#include <stdlib.h>
|
||||
#include <jemalloc/jemalloc.h>
|
||||
|
||||
void break_here(void) {}
|
||||
|
||||
char *ptr = NULL;
|
||||
|
||||
int main(void)
|
||||
{
|
||||
|
||||
ptr = (char *)malloc(2 * sizeof(char));
|
||||
ptr[0] = 'A';
|
||||
ptr[1] = 'B';
|
||||
|
||||
break_here();
|
||||
|
||||
}
|
||||
@ -0,0 +1,21 @@
|
||||
#include <stdlib.h>
|
||||
#include <jemalloc/jemalloc.h>
|
||||
|
||||
void break_here(void) {}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
|
||||
// Allocate a small memory
|
||||
char *ptr = (char *)malloc(2 * sizeof(char));
|
||||
ptr[0] = 'A';
|
||||
ptr[1] = 'B';
|
||||
|
||||
// allocate non small class size memory
|
||||
char *ptr2 = (char *)malloc(30 * 1024);
|
||||
ptr2[0] = 'A';
|
||||
ptr2[1] = 'B';
|
||||
|
||||
break_here();
|
||||
|
||||
}
|
||||
Loading…
Reference in new issue