Refine `vmmap` output on Darwin (#3255)

* Refine `vmmap` based on shared cache information * Remove support for partial overlaps of mappings with the shared cache * Coaless contiguous vmmap ranges * Omit Shared Cache entries in `vmmap` by default * Remove nesting in _refine_memory_map * Document differences between `aglib` and Debugger API `vmmap` * Change display format
3 months ago · abf873b0b3
parent 4506754bbc
commit abf873b0b3
9 changed files with 218 additions and 16 deletions
--- a/docs/commands/memory/vmmap.md
+++ b/docs/commands/memory/vmmap.md
@ -2,8 +2,8 @@
 # vmmap

 ```text
-usage: vmmap [-h] [-w] [-x] [-A LINES_AFTER] [-B LINES_BEFORE] [-C CONTEXT]
-             [--gaps]
+usage: vmmap [-h] [-w] [-x] [-s] [-A LINES_AFTER] [-B LINES_BEFORE]
+             [-C CONTEXT] [--gaps]
             [gdbval_or_str]

 ```
@ -39,6 +39,7 @@ Memory pages can also be added manually with the use of vmmap-add, vmmap-clear a
 |-h|--help|show this help message and exit|
 |-w|--writable|Display writable maps only|
 |-x|--executable|Display executable maps only|
+|-s|--expand-shared-cache|Expand all entries in the DYLD Shared Cache (Darwin only)|
 |-A|--lines-after|Number of pages to display after result (default: 1)|
 |-B|--lines-before|Number of pages to display before result (default: 1)|
 |-C|--context|Number of pages to display around the result|
--- a/pwndbg/aglib/macho.py
+++ b/pwndbg/aglib/macho.py
@ -422,10 +422,14 @@ class DyldSharedCache:
    the size of the struct to denote its version.
    """

+    slide: int
+    "The slide value of the DyLD Shared Cache, in bytes."
+
    def __init__(self, addr: int):
        self.addr = addr

        # Preload a few a few values, to speed things up later.
+        self.slide = self._slide()
        images_offset = 0x18 if self._header_size() <= 0x1C4 else 0x1C0
        self._images_base = self.addr + pwndbg.aglib.memory.u32(self.addr + images_offset)
        self.image_count = pwndbg.aglib.memory.u32(self.addr + images_offset + 4)
@ -514,8 +518,7 @@ class DyldSharedCache:

            return end - start

-    @property
-    def slide(self) -> int:
+    def _slide(self) -> int:
        "The slide value of the DyLD Shared Cache, in bytes."
        mapping_ptr = self.base + self._header_size()
        mapping_base = pwndbg.aglib.memory.u64(mapping_ptr)
@ -552,7 +555,7 @@ class DyldSharedCache:
    def image_base(self, index: int):
        assert self.image_count > index

-        return pwndbg.aglib.memory.u64(self._images_base + index * 0x20)
+        return pwndbg.aglib.memory.u64(self._images_base + index * 0x20) + self.slide

    def image_name(self, index: int):
        assert self.image_count > index
@ -577,7 +580,7 @@ class DyldSharedCache:
                pwndbg.aglib.memory.string(
                    self.addr + struct.unpack("<I", data[base + 0x18 : base + 0x1C])[0]
                ),
-                struct.unpack("<Q", data[base : base + 8])[0],
+                struct.unpack("<Q", data[base : base + 8])[0] + self.slide,
            )

    @property
--- a/pwndbg/aglib/vmmap.py
+++ b/pwndbg/aglib/vmmap.py
@ -1,12 +1,16 @@
 from __future__ import annotations

+import bisect
 from typing import Tuple

 import pwndbg
+import pwndbg.aglib.arch
 import pwndbg.aglib.vmmap_custom
 import pwndbg.lib.cache
 import pwndbg.lib.memory
 from pwndbg.dbg import MemoryMap
+from pwndbg.lib.arch import Platform
+from pwndbg.lib.memory import Page

 pwndbg.config.add_param(
    "vmmap-prefer-relpaths",
@ -16,9 +20,85 @@ pwndbg.config.add_param(
 )


+def _refine_memory_map(pages: MemoryMap) -> MemoryMap:
+    if not (
+        pwndbg.aglib.arch.platform == Platform.DARWIN
+        and pwndbg.aglib.macho.shared_cache() is not None
+    ):
+        return pages
+
+    # Darwin platforms use something called the Shared Cache for system
+    # libraries. Debuggers may report mapping ranges that belong to the
+    # shared cache in many ways, but we would like to tag those with a
+    # little more information.
+    final_pages = []
+
+    shared_cache = pwndbg.aglib.macho.shared_cache()
+    shared_cache_start = shared_cache.base
+    shared_cache_end = shared_cache_start + shared_cache.size
+
+    images = list(shared_cache.images_sorted)
+    images_base = [image[1] for image in images]
+
+    for page in pages.ranges():
+        if page.end < shared_cache_start or page.start >= shared_cache_end:
+            # No overlap with the shared cache.
+            final_pages.append(page)
+            continue
+
+        # We do not support partial overlaps between other mappings and the
+        # shared cache.
+        #
+        # While conceptually there's nothing stopping these from happening,
+        # if we ever encounter such a situation, it likely means that we
+        # either got something wrong, or that Darwin/LLDB has changed in
+        # such a way that we are likely not able to gracefully handle.
+        #
+        assert page.start >= shared_cache_start and page.end <= shared_cache_end
+
+        one_past_index = bisect.bisect_right(images_base, page.start)
+        curr_base = page.start
+
+        while True:
+            if one_past_index > len(images):
+                break
+
+            if one_past_index == 0:
+                # Indicates that this mapping is not part of any image, but
+                # still part of the shared cache itself. Use a special name
+                # for it.
+                objfile = "[SharedCacheHeader]"
+            elif images_base[one_past_index - 1] >= page.end:
+                break
+            else:
+                # Name this mapping after the image it belongs to.
+                objfile = images[one_past_index - 1][0].decode("ascii")
+                curr_base = max(images_base[one_past_index - 1], page.start)
+
+            if one_past_index == len(images):
+                end = page.end
+            else:
+                end = min(page.end, images_base[one_past_index])
+
+            final_pages.append(
+                Page(
+                    curr_base,
+                    end - curr_base,
+                    page.flags,
+                    curr_base - shared_cache_start,
+                    objfile,
+                    in_darwin_shared_cache=True,
+                )
+            )
+
+            one_past_index += 1
+
+    return type(pages)(final_pages)
+
+
@pwndbg.lib.cache.cache_until("start", "stop")
 def get_memory_map() -> MemoryMap:
-    return pwndbg.dbg.selected_inferior().vmmap()
+    return _refine_memory_map(pwndbg.dbg.selected_inferior().vmmap())


@pwndbg.lib.cache.cache_until("start", "stop")
--- a/pwndbg/commands/rop.py
+++ b/pwndbg/commands/rop.py
@ -167,7 +167,7 @@ def iterate_over_pages(mem_limit: int) -> Iterator[Tuple[str, pwndbg.lib.memory.
        return

    proc = pwndbg.dbg.selected_inferior()
-    for page in proc.vmmap().ranges():
+    for page in pwndbg.aglib.vmmap.get_memory_map().ranges():
        if not page.execute:
            continue

--- a/pwndbg/commands/vmmap.py
+++ b/pwndbg/commands/vmmap.py
@ -178,6 +178,12 @@ parser.add_argument(
 )
 parser.add_argument("-w", "--writable", action="store_true", help="Display writable maps only")
 parser.add_argument("-x", "--executable", action="store_true", help="Display executable maps only")
+parser.add_argument(
+    "-s",
+    "--expand-shared-cache",
+    action="store_true",
+    help="Expand all entries in the DYLD Shared Cache (Darwin only)",
+)
 parser.add_argument(
    "-A", "--lines-after", type=int, help="Number of pages to display after result", default=1
 )
@ -206,6 +212,7 @@ def vmmap(
    lines_before=1,
    context=None,
    gaps=False,
+    expand_shared_cache=False,
 ) -> None:
    lookaround_lines_limit = 64

@ -217,7 +224,7 @@ def vmmap(
        lines_before = min(lookaround_lines_limit, lines_before)

    # All displayed pages, including lines after and lines before
-    vmmap = pwndbg.dbg.selected_inferior().vmmap()
+    vmmap = pwndbg.aglib.vmmap.get_memory_map()
    total_pages = vmmap.ranges()

    # Filtered memory pages, indicated by a backtrace arrow in results
@ -225,6 +232,9 @@ def vmmap(

    # Only filter when -A and -B arguments are valid
    if gdbval_or_str and lines_after >= 0 and lines_before >= 0:
+        # Always expand shared cache on detailed output.
+        expand_shared_cache = True
+
        # Find matching page in memory
        filtered_pages = list(filter(pages_filter(gdbval_or_str), total_pages))
        pages_to_display = []
@ -264,10 +274,40 @@ def vmmap(
    print(M.legend())
    print_vmmap_table_header()

+    shared_cache_first = None
+    shared_cache_last = None
+    shared_cache_collapsed = 0
+
+    def flush_shared_cache_info():
+        nonlocal shared_cache_first
+        nonlocal shared_cache_last
+        if shared_cache_last is not None:
+            print(
+                pwndbg.lib.memory.format_address(
+                    shared_cache_first.start,
+                    shared_cache_last.end - shared_cache_first.start,
+                    "---p",
+                    shared_cache_first.offset,
+                    "[DYLD Shared Cache]",
+                )
+            )
+
+            shared_cache_first = None
+            shared_cache_last = None
+
    for page in total_pages:
        if (executable and not page.execute) or (writable and not page.write):
            continue

+        # Omit ranges from the shared cache if requested.
+        if page is not None and page.in_darwin_shared_cache and not expand_shared_cache:
+            if shared_cache_first is None:
+                shared_cache_first = page
+            shared_cache_last = page
+            shared_cache_collapsed += 1
+            continue
+        flush_shared_cache_info()
+
        backtrace_prefix = None
        display_text = str(page)

@ -281,6 +321,13 @@ def vmmap(

        print(M.get(page.vaddr, text=display_text, prefix=backtrace_prefix))

+    flush_shared_cache_info()
+    if shared_cache_collapsed > 0:
+        print(
+            f"[Omitted {shared_cache_collapsed} {'entry' if shared_cache_collapsed == 1 else 'entries'} from the DYLD Shared Cache in total, use '-s' to expand]"
+        )
+        shared_cache_collapsed = 0
+
    if vmmap.is_qemu():
        print(
            "\n[QEMU <8.1 target detected - vmmap result might not be accurate; see `help vmmap`]"
--- a/pwndbg/dbg/init.py
+++ b/pwndbg/dbg/init.py
@ -395,7 +395,13 @@ class Process:

    def vmmap(self) -> MemoryMap:
        """
-        Returns the virtual memory map of this process.
+        Returns the virtual memory map of this process, as seen by the debugger.
+
+        Generally, one should prefer `pwndbg.aglib.vmmap.get()` over this
+        function, as this passes the raw information from the debugger more or
+        less straight through, without applying more general Pwndbg enhancements
+        to the memory map. This is the lower-level functionality on top of which
+        the function in `aglib` is implemented.
        """
        raise NotImplementedError()

--- a/pwndbg/dbg/lldb/init.py
+++ b/pwndbg/dbg/lldb/init.py
@ -887,6 +887,54 @@ class LLDBProcess(pwndbg.dbg_mod.Process):

        return pages

+    def _process_vmmap_pages(
+        self, pages: List[pwndbg.lib.memory.Page]
+    ) -> List[pwndbg.lib.memory.Page]:
+        # Do a final, coalescing pass, for identical ranges that are sequential
+        # and contiguous to each other in the virtual address space, and join
+        # them into a single range.
+        #
+        # LLDB - particularly in macOS - may yield multiple ranges that describe
+        # contiguous sequential regions of virtual memory, but are otherwise
+        # identical. This seems to happen because LLDB internally distinguishes
+        # between different Mach-O sections. That information, however, is not
+        # made reliably available to us.
+        final_pages: List[pwndbg.lib.memory.Page] = []
+        start = None
+        end = None
+        for page in pages:
+            if start is None:
+                start = page
+                continue
+
+            target = end if end is not None else start
+            otherwise_equal = (
+                target.flags == page.flags
+                and target.objfile == page.objfile
+                and target.in_darwin_shared_cache == page.in_darwin_shared_cache
+            )
+
+            if target.end == page.start and otherwise_equal:
+                end = page
+            else:
+                final_pages.append(
+                    pwndbg.lib.memory.Page(
+                        start.start,
+                        target.end - start.start,
+                        start.flags,
+                        start.offset,
+                        start.objfile,
+                        start.in_darwin_shared_cache,
+                    )
+                )
+                start = page
+                end = None
+
+        if start is not None:
+            final_pages.append(start)
+
+        return final_pages
+
    @override
    def vmmap(self) -> pwndbg.dbg_mod.MemoryMap:
        from pwndbg.aglib.commpage import get_commpage_mappings
@ -896,7 +944,7 @@ class LLDBProcess(pwndbg.dbg_mod.Process):
            pages.extend(get_commpage_mappings())
            pages.sort()

-            return LLDBMemoryMap(pages)
+            return LLDBMemoryMap(self._process_vmmap_pages(pages))

        from pwndbg.aglib.kernel.vmmap import kernel_vmmap
        from pwndbg.aglib.vmmap_custom import get_custom_pages
@ -905,7 +953,8 @@ class LLDBProcess(pwndbg.dbg_mod.Process):
        pages.extend(kernel_vmmap())
        pages.extend(get_custom_pages())
        pages.sort()
-        return LLDBMemoryMap(pages)
+
+        return LLDBMemoryMap(self._process_vmmap_pages(pages))

    def find_largest_range_len(
        self, min_search: int, max_search: int, test: Callable[[int], bool]
--- a/pwndbg/lib/memory.py
+++ b/pwndbg/lib/memory.py
@ -29,6 +29,13 @@ def round_up(address: int, align: int) -> int:
    return (address + (align - 1)) & (~(align - 1))


+def format_address(vaddr: int, memsz: int, permstr: str, offset: int, objfile: str | None = None) -> str:
+    "Format the given address as a string."
+
+    width = 2 + 2 * pwndbg.aglib.arch.ptrsize
+    return f"{vaddr:#{width}x} {vaddr + memsz:#{width}x} {permstr} {memsz:8x} {offset:7x} {objfile or ''}"
+
+
 align_down = round_down
 align_up = round_up

@ -67,12 +74,21 @@ class Page:
    - A path to a file, such as `/usr/lib/libc.so.6`
    """

-    def __init__(self, start: int, size: int, flags: int, offset: int, objfile: str = "") -> None:
+    in_darwin_shared_cache: bool
+    """
+    Whether this mapping is part of the Darwin Shared Cache.
+
+    This is an interesting property to know, as these entries may not be useful
+    to us at all times, and having an easy way to filter them out is helpful..
+    """
+
+    def __init__(self, start: int, size: int, flags: int, offset: int, objfile: str = "", in_darwin_shared_cache: bool = False) -> None:
        self.vaddr = start
        self.memsz = size
        self.flags = flags
        self.offset = offset
        self.objfile = objfile
+        self.in_darwin_shared_cache = in_darwin_shared_cache

        # if self.rwx:
        # self.flags = self.flags ^ 1
@ -147,8 +163,8 @@ class Page:
            objfile = self.objfile if len(rel) > len(self.objfile) else rel
        else:
            objfile = self.objfile
-        width = 2 + 2 * pwndbg.aglib.arch.ptrsize
-        return f"{self.vaddr:#{width}x} {self.vaddr + self.memsz:#{width}x} {self.permstr} {self.memsz:8x} {self.offset:7x} {objfile or ''}"
+        
+        return format_address(self.vaddr, self.memsz, self.permstr, self.offset, objfile=objfile)

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}({self.__str__()!r})"
--- a/tests/library/qemu_user/tests/test_aarch64.py
+++ b/tests/library/qemu_user/tests/test_aarch64.py
@ -819,7 +819,7 @@ def test_memory_read_error_handling(qemu_assembly_run):
    # Find the first memory page where there is a gap after it
    stack_end_addr = -1
    page_prev = None
-    for page in pwndbg.dbg.selected_inferior().vmmap().ranges():
+    for page in pwndbg.aglib.vmmap.get_memory_map().ranges():
        if page_prev is not None and page_prev.end != page.start:
            stack_end_addr = page_prev.end
            break