mallocng: Implement ng-dump command to dump heap state (#3256)

* implement ng-dump

* clarify index

* fix rebase

* add ability for a property to override color

* make color usage more consistent

* handle ansii in descriptions properly

* add ng-dump test

* finish rename

* add --meta-area flag to ng-dump

* remark on the coloring difference in the command description

* clarify nominal size on freed slots

* port test to lldb

* Update scripts/_docs/gen_docs_generic.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* fix function name change

* Update pwndbg/lib/pretty_print.py

* Update pwndbg/commands/mallocng.py

* lint

---------

Co-authored-by: Disconnect3d <dominik.b.czarnota@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
pull/3279/head
k4lizen 3 months ago committed by GitHub
parent 9cf6092414
commit 4506754bbc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -262,6 +262,7 @@
## musl
- [mallocng-dump](musl/mallocng-dump.md) - Dump the mallocng heap.
- [mallocng-explain](musl/mallocng-explain.md) - Gives a quick explanation of musl's mallocng allocator.
- [mallocng-find](musl/mallocng-find.md) - Find slot which contains the given address.
- [mallocng-group](musl/mallocng-group.md) - Print out information about a mallocng group at the given address.

@ -0,0 +1,34 @@
<!-- THIS PART OF THIS FILE IS AUTOGENERATED. DO NOT MODIFY IT. See scripts/generate-docs.sh -->
# mallocng-dump
```text
usage: mallocng-dump [-h] [-ma META_AREA]
```
Dump the mallocng heap.
May produce lots of output.
**Alias:** ng-dump
### Optional arguments
|Short|Long|Help|
| :--- | :--- | :--- |
|-h|--help|show this help message and exit|
|-ma|--meta-area|Dump only the meta area at the provided address.|
### Notes
Since the command may produce lots of output, you may want to pipe it to
less with `| ng-dump | less -R`.
The [index] next to the metas is their index in the doubly linked list
pointed to by ctx.freed_meta_head. The [index] next to the slots is
the slot's index inside of its group (thus, these will always be sequential).
Notice that the pointers in the output of this command aren't colored according
to their mapping's color but rather according to the object's allocation status.
Color legend: allocated; freed; available.
<!-- END OF AUTOGENERATED PART. Do not modify this line or the line below, they mark the end of the auto-generated part of the file. If you want to extend the documentation in a way which cannot easily be done by adding to the command help description, write below the following line. -->
<!-- ------------\>8---- ----\>8---- ----\>8------------ -->

@ -6,6 +6,7 @@ https://elixir.bootlin.com/musl/v1.2.5/source/src/malloc/mallocng
from __future__ import annotations
from enum import Enum
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple
@ -422,7 +423,7 @@ class Slot:
assert self.reserved_in_header == 7
# It is possible for start[-3] to contain (7<<5),
# but p[-3] shouldn't unless the slot is free.
return -1
self._reserved = -1
return self._reserved
@ -432,8 +433,10 @@ class Slot:
Raises:
pwndbg.dbg_mod.Error: When reading meta fails.
"""
# Special case (probably) freed chunks:
# Special case (probably) freed slots (see Slot.reserved):
if self.reserved == -1:
# Returning the value calculated below would confuse users
# as, semantically, the slot has no size.
return 0
# https://elixir.bootlin.com/musl/v1.2.5/source/src/malloc/mallocng/meta.h#L159
@ -1433,5 +1436,51 @@ class Mallocng(pwndbg.aglib.heap.heap.MemoryAllocator):
else:
return found.start
def get_free_metas(self) -> Dict[int, Tuple[int, Meta]]:
"""
Get all free metas by traversing the ctx.free_meta_head doubly
linked list. Map them to their index in the list.
Raises:
pwndbg.dbg_mod.Error: If some meta cannot be read or is
corrupted.
Returns:
A dictionary that maps: meta address -> (meta index in list, Meta object).
"""
if self.ctx.free_meta_head == 0:
return {}
# A free meta gets completely cleared.
# https://elixir.bootlin.com/musl/v1.2.5/source/src/malloc/mallocng/meta.h#L111
# Except for the prev/next pointers which add it to the ctx.free_meta_head chain.
# https://elixir.bootlin.com/musl/v1.2.5/source/src/malloc/mallocng/meta.h#L85
# We could check for everything, but mallocng doesn't so lets not.
start_meta = Meta(self.ctx.free_meta_head)
meta_dict: Dict[int, Tuple[int, Meta]] = {start_meta.addr: (0, start_meta)}
idx = 1
cur_meta = Meta(start_meta.next)
while cur_meta.addr != start_meta.addr:
meta_dict[cur_meta.addr] = (idx, cur_meta)
idx += 1
cur_meta = Meta(cur_meta.next)
return meta_dict
def meta_is_avail(self, addr: int) -> bool:
"""
Checks whether a meta is available.
"""
# It seems all available metas are contiguous.
# https://elixir.bootlin.com/musl/v1.2.5/source/src/malloc/mallocng/malloc.c#L109
return (
self.ctx.avail_meta
<= addr
< (self.ctx.avail_meta + Meta.sizeof() * self.ctx.avail_meta_count)
)
mallocng = Mallocng()

@ -435,23 +435,39 @@ def smart_dump_slot(
return output
def dump_meta_area(meta_area: mallocng.MetaArea) -> str:
area_range = (
"@ "
+ C.memory.get(meta_area.addr)
+ " - "
+ C.memory.get(meta_area.addr + meta_area.area_size)
)
def dump_meta_area(meta_area: mallocng.MetaArea, coming_from_dump: bool = False) -> str:
if coming_from_dump:
# We don't want users to wonder which colorings in ng-dump are according to
# state (allocated/avail/freed), and which are according to the memory mapping's color,
# so we will just disable address coloring here.
area_range = "@ " + hex(meta_area.addr) + " - " + hex(meta_area.addr + meta_area.area_size)
else:
area_range = (
"@ "
+ C.memory.get(meta_area.addr)
+ " - "
+ C.memory.get(meta_area.addr + meta_area.area_size)
)
pp = PropertyPrinter()
if coming_from_dump:
slots = ""
slots_is_addr = False
# Don't color according to mapping.
next_prop = Property(name="next", value=hex(meta_area.next), value_color_func=C.normal)
else:
slots = meta_area.slots
slots_is_addr = True
next_prop = Property(name="next", value=meta_area.next, is_addr=True)
pp.start_section("meta_area", area_range)
pp.add(
[
Property(name="check", value=meta_area.check),
Property(name="next", value=meta_area.next, is_addr=True),
next_prop,
Property(name="nslots", value=meta_area.nslots),
Property(name="slots", value=meta_area.slots, is_addr=True),
Property(name="slots", value=slots, is_addr=slots_is_addr, extra="array of metas"),
]
)
return pp.dump()
@ -1114,6 +1130,119 @@ def mallocng_visualize_slots(address: int, count: int = default_vis_count):
print("\n".join(out))
parser = argparse.ArgumentParser(
description="""
Dump the mallocng heap.
May produce lots of output.
""",
)
parser.add_argument(
"-ma", "--meta-area", type=int, help="Dump only the meta area at the provided address."
)
@pwndbg.commands.Command(
parser,
category=CommandCategory.MUSL,
aliases=["ng-dump"],
notes=(
f"""
Since the command may produce lots of output, you may want to pipe it to
less with `| ng-dump | less -R`.
The [index] next to the metas is their index in the doubly linked list
pointed to by ctx.freed_meta_head. The [index] next to the slots is
the slot's index inside of its group (thus, these will always be sequential).
Notice that the pointers in the output of this command aren't colored according
to their mapping's color but rather according to the object's allocation status.
Color legend: {C.colorize("allocated", state_alloc_color)}; """
f'{C.colorize("freed", state_freed_color)}; {C.colorize("available", state_avail_color)}.'
),
)
@pwndbg.commands.OnlyWhenRunning
def mallocng_dump(meta_area: Optional[int] = None) -> None:
if not ng.init_if_needed():
print(message.error("Couldn't find the allocator, aborting the command."))
return
ctx: mallocng.MallocContext = ng.ctx
try:
free_metas = ng.get_free_metas()
except pwndbg.dbg_mod.Error as e:
print(message.error(f"Failed traversing free meta chain. {e}"))
print(message.error("Meta allocation state may be wrong."))
free_metas = {}
meta_padding = " " * 10
slot_padding = " " * 15
# Rename variables for clarity.
specified_meta_area = meta_area
meta_area = None
if specified_meta_area is not None:
ma_addr = specified_meta_area
else:
# Iterate over all meta_areas
ma_addr = ctx.meta_area_head
while ma_addr != 0:
try:
meta_area = mallocng.MetaArea(ma_addr)
except pwndbg.dbg_mod.Error as e:
print(message.error(f"Cannot read meta area @ {ma_addr:#x}: {e}"))
break
print(dump_meta_area(meta_area, coming_from_dump=True))
# Iterate over all metas in this meta_area
for i in range(0, meta_area.nslots):
meta_addr = meta_area.at_index(i)
if meta_addr in free_metas:
print(
meta_padding
+ C.colorize(f"{meta_addr:#x} [{free_metas[meta_addr][0]}]", state_freed_color)
)
elif ng.meta_is_avail(meta_addr):
print(meta_padding + C.colorize(f"{meta_addr:#x}", state_avail_color))
else:
print(meta_padding + C.colorize(f"{meta_addr:#x}", state_alloc_color), end="")
try:
meta = mallocng.Meta(meta_addr)
meta.preload()
group = mallocng.Group(meta.mem)
meta.preload()
except pwndbg.dbg_mod.Error as e:
print(message.error(f"Failed resolving meta / group data ({e}). Skipping.."))
continue
print(f" -> group @ {group.addr:#x} (slot size: {meta.stride:#x})")
# Iterate over all slots in this group
idx = 0
while idx < meta.cnt:
slot_addr = group.at_index(idx)
sstate = meta.slotstate_at_index(idx)
cur_slot_color = get_slot_color(sstate)
print(
slot_padding + C.colorize(f"{slot_addr:#x}", cur_slot_color) + f" [{idx}]"
)
idx += 1
print()
ma_addr = meta_area.next
print()
if specified_meta_area is not None:
# Exit the loop since we're only printing one meta area.
break
@pwndbg.commands.Command(
"Gives a quick explanation of musl's mallocng allocator.",
category=CommandCategory.MUSL,

@ -24,6 +24,9 @@ class Property:
extra: str | List[str] = ""
is_addr: bool = False
use_hex: bool = True
# Override the PropertyPrinter's color.
name_color_func: Optional[Callable[[str], str]] = None
value_color_func: Optional[Callable[[str], str]] = None
class PropertyPrinter:
@ -84,9 +87,13 @@ class PropertyPrinter:
)
for prop in prop_group:
# The property may override the PropertyPrinter's color functions.
prop_name_cfunc = prop.name_color_func if prop.name_color_func is not None else self.name_color_func
prop_value_cfunc = prop.value_color_func if prop.value_color_func is not None else self.value_color_func
self.text += (
indentation_str
+ color.ljust_colored(self.name_color_func(prop.name) + ":", self.value_offset)
+ color.ljust_colored(prop_name_cfunc(prop.name) + ":", self.value_offset)
+ " "
)
@ -94,11 +101,11 @@ class PropertyPrinter:
base = 16 if prop.use_hex else 10
colored_val = color.memory.get(int(prop.value, base))
else:
colored_val = self.value_color_func(prop.value)
colored_val = prop_value_cfunc(prop.value)
colored_alt_val = ""
if prop.alt_value is not None:
colored_alt_val = " (" + self.value_color_func(prop.alt_value) + ")"
colored_alt_val = f" ({prop_value_cfunc(prop.alt_value)})"
self.text += color.ljust_colored(colored_val + colored_alt_val, self.extra_offset)

@ -15,6 +15,7 @@ from scripts._docs.command_docs_common import ExtractedCommand
from scripts._docs.command_docs_common import category_to_folder_name
from scripts._docs.command_docs_common import extracted_filename
from scripts._docs.gen_docs_generic import ALL_DEBUGGERS
from scripts._docs.gen_docs_generic import strip_ansi_color
from scripts._docs.gen_docs_generic import verify_existence
AUTOGEN_END_MARKER1 = "<!-- END OF AUTOGENERATED PART. Do not modify this line or the line below, they mark the end of the auto-generated part of the file. If you want to extend the documentation in a way which cannot easily be done by adding to the command help description, write below the following line. -->\n"
@ -76,7 +77,7 @@ def get_markdown_body(cmd: ExtractedCommand) -> str:
if cmd.pure_epilog:
mdFile.write("### Extra\n" + cmd.pure_epilog + "\n")
return "\n" + mdFile.get_md_text().strip() + "\n"
return "\n" + strip_ansi_color(mdFile.get_md_text().strip()) + "\n"
def convert_all_to_markdown(

@ -13,6 +13,7 @@ from scripts._docs.configuration_docs_common import BASE_PATH
from scripts._docs.configuration_docs_common import ExtractedParam
from scripts._docs.configuration_docs_common import extracted_filename
from scripts._docs.gen_docs_generic import ALL_DEBUGGERS
from scripts._docs.gen_docs_generic import strip_ansi_color
from scripts._docs.gen_docs_generic import update_files_simple
from scripts._docs.gen_docs_generic import verify_existence
from scripts._docs.gen_docs_generic import verify_files_simple
@ -80,7 +81,7 @@ def convert_to_markdown(scope: str, debugger_to_params: Dict[str, list[Extracted
autogen_warning = (
"<!-- THIS WHOLE FILE IS AUTOGENERATED. DO NOT MODIFY IT. See scripts/generate-docs.sh -->"
)
return autogen_warning + "\n" + mdFile.get_md_text()
return autogen_warning + "\n" + strip_ansi_color(mdFile.get_md_text())
def check_index(num_scopes: int):

@ -22,6 +22,7 @@ from scripts._docs.function_docs_common import BASE_PATH
from scripts._docs.function_docs_common import ExtractedFunction
from scripts._docs.function_docs_common import extracted_filename
from scripts._docs.gen_docs_generic import ALL_DEBUGGERS
from scripts._docs.gen_docs_generic import strip_ansi_color
from scripts._docs.gen_docs_generic import update_files_simple
from scripts._docs.gen_docs_generic import verify_existence
from scripts._docs.gen_docs_generic import verify_files_simple
@ -138,7 +139,9 @@ def convert_to_markdown(extracted: list[Tuple[str, list[ExtractedFunction]]]) ->
autogen_warning = (
"<!-- THIS WHOLE FILE IS AUTOGENERATED. DO NOT MODIFY IT. See scripts/generate-docs.sh -->"
)
markdowned[INDEX_PATH] = hide_nav + autogen_warning + "\n" + mdFile.get_md_text()
markdowned[INDEX_PATH] = (
hide_nav + autogen_warning + "\n" + strip_ansi_color(mdFile.get_md_text())
)
return markdowned

@ -2,6 +2,7 @@
from __future__ import annotations
import os
import re
from typing import Dict
from typing import Tuple
@ -91,3 +92,9 @@ def get_debugger() -> str:
assert debugger and "Use the PWNDBG_DOCGEN_DBGNAME env variable."
assert debugger in ALL_DEBUGGERS and "Debugger not defined in the ALL_DEBUGGERS array."
return debugger
def strip_ansi_color(x: str) -> str:
# In case some description etc. contains ANSI coloring, we need to
# take that out since we do not render it properly on the website.
return re.sub("\x1b\\[[\\d;]+m", "", x)

@ -482,3 +482,21 @@ async def test_mallocng_vis(ctrl: Controller, binary: str):
# (Now the outer group will be printed.)
vis_out3 = color.strip(await ctrl.execute_and_capture("ng-vis buffer1")).splitlines()
assert len(vis_out3) > len(vis_out)
@pwndbg_test
@pytest.mark.parametrize(
"binary", [HEAP_MALLOCNG_DYN, HEAP_MALLOCNG_STATIC], ids=["dynamic", "static"]
)
async def test_mallocng_dump(ctrl: Controller, binary: str):
await launch_to(ctrl, binary, "break_here")
await ctrl.finish()
dump_out = await ctrl.execute_and_capture("ng-dump")
assert "meta_area" in dump_out
assert "group @" in dump_out
assert "(slot size: 0x30)" in dump_out # buffer{1,2,3}
assert "(slot size: 0x2a0)" in dump_out # buffer{4,5}
# 10 slots in the buffer{1,2,3} group.
for idx in range(10):
assert f"[{idx}]" in dump_out

Loading…
Cancel
Save