diff --git a/pwndbg/aglib/__init__.py b/pwndbg/aglib/__init__.py index abff22bb3..74d2f57b9 100644 --- a/pwndbg/aglib/__init__.py +++ b/pwndbg/aglib/__init__.py @@ -29,9 +29,11 @@ def load_aglib(): import pwndbg.aglib.heap import pwndbg.aglib.kernel import pwndbg.aglib.kernel.vmmap + import pwndbg.aglib.macho import pwndbg.aglib.memory import pwndbg.aglib.nearpc import pwndbg.aglib.next + import pwndbg.aglib.objc import pwndbg.aglib.onegadget import pwndbg.aglib.proc import pwndbg.aglib.qemu diff --git a/pwndbg/aglib/macho.py b/pwndbg/aglib/macho.py new file mode 100644 index 000000000..8abfa5621 --- /dev/null +++ b/pwndbg/aglib/macho.py @@ -0,0 +1,668 @@ +from __future__ import annotations + +import itertools +import struct +from typing import Callable +from typing import Generator +from typing import Generic +from typing import Tuple +from typing import TypeVar + +import pwndbg +import pwndbg.aglib.memory + + +def _uleb128(ptr: int) -> Tuple[int, int]: + """ + Decode a ULEB128 value at the start of the given address, and return the + decoded number, along with how many bytes the entire number takes. + """ + acc = 0 + i = 0 + while True: + byte = pwndbg.aglib.memory.u8(ptr + i) + + acc |= (byte & 0x7F) << (7 * i) + if byte & 0x80 == 0: + # This is the terminator byte. + break + + i += 1 + + return acc, i + 1 + + +class _RawTrie: + """ + This is the untyped base implementation of Trie. + """ + + def __init__(self, ptr: int): + self._ptr = ptr + + def _walk( + self, + offset: int, + acc: bytes, + edgesel: Callable[[bytes, bytes], bool], + nodesel: Callable[[bytes], bool], + ) -> Generator[Tuple[bytes, int, int]]: + """ + Walk the trie. + + Allows callers to select edges for exploration and nodes for yielding + through the `edgesel` and `nodesel` callables. + + At every edge, this function will call `edgesel` with the currently + accumulated name and the name associated with the edge, and will take + action according to the value it returns. If it returns True, that edge + will be explored, otherwise, the edge will be ignored. + + At every node, this function will call `nodesel` with the currently + accumulated name. If it returns True, the node will be yielded, + otherwise, it will be ignored. + + Yielded node information consists of a tuple of (name, ptr, length), + where `name` is the name of the node, `ptr` is the address of the first + byte of its associated data, and `length` is the length of its + associated data, in bytes. + """ + base = self._ptr + offset + + node_data_len, node_data_len_len = _uleb128(base) + if node_data_len != 0 and nodesel(acc): + # The user selected this node, stop the walk here. + yield acc, base + node_data_len_len, node_data_len + + cursor = base + node_data_len_len + node_data_len + + # The number of children is NOT a ULEB128. + children = pwndbg.aglib.memory.u8(cursor) + cursor += 1 + + for _ in range(children): + name = pwndbg.aglib.memory.string(cursor) + cursor += len(name) + 1 + + child_offset, child_offset_len = _uleb128(cursor) + cursor += child_offset_len + + if edgesel(acc, name): + yield from self._walk(child_offset, acc + name, edgesel, nodesel) + + # The cursor is already at the next child. + + def _get_raw(self, name: bytes) -> Tuple[bytes, int, int] | None: + """ + Get the data associated with the node of given name, if it exists. + """ + + def nodesel(candidate: bytes) -> bool: + return candidate == name + + def edgesel(acc: bytes, candidate: bytes) -> bool: + return name[len(acc) :].startswith(candidate) + + return next(self._walk(0, b"", edgesel, nodesel), None) + + def _entries_raw(self) -> Generator[Tuple[bytes, int, int]]: + """ + List all the entries in the trie, along with their associated data. + """ + yield from self._walk(0, b"", lambda _acc, _candidate: True, lambda _candidate: True) + + def keys(self) -> Generator[bytes]: + """ + List the name of all nodes in the trie. + """ + yield from (name for name, _ptr, _size in self._entries_raw()) + + +T = TypeVar("T") + + +class Trie(_RawTrie, Generic[T]): + """ + Prefix Tree + + The Mach-O format makes extensive use of prefix trees for any operation that + involves string-based loookup. + """ + + def __init__(self, ptr: int, ty: Callable[[int, int], T]): + super().__init__(ptr) + self._ty = ty + + def get(self, name: bytes) -> T | None: + """ + Get the data associated with the node of given name, if it exists. + """ + _, ptr, size = self._get_raw(name) + return self._ty(ptr, size) + + def entries(self) -> Generator[Tuple[bytes, T]]: + """ + List all the entries in the trie, along with their associated data. + """ + yield from ((name, self._ty(ptr, size)) for name, ptr, size in self._entries_raw()) + + +def _uleb128_ty(ptr: int, size: int) -> int: + "The type function of ULEB128 associated data, for use with Trie" + + value, actual_size = _uleb128(ptr) + + # Can fail if the type is wrong or the trie is corrupted. + assert size == actual_size, "Size mismatch while validating ULEB128" + + return value + + +class DyldSharedCacheMapping: + def __init__(self, addr: int, size: int, file_offset: int, max_prot: int, init_prot: int): + self.addr = addr + self.size = size + self.file_offset = file_offset + self.max_prot = max_prot + self.init_prot = init_prot + + +def _lookup8(blob: bytes, level: int) -> int: + """ + Hashes a variable-length byte array into a 64-bit integer. + + Apple uses a variation of an algorithm published by Bob Jenkins in 1997 on + Dr. Dobb's Journal, and later republished on their website under the title + "The Hash"[1]. The version used by Apple was also written by Jenkins[2], but + does not seem to be mentioned in any of their articles, so I don't couldn't + gather much information about it besides that it looks like a 64-bit variant + of the algorithm in the article. + + This function is a direct Python port of the algorithm in [2]. + + [1]: https://burtleburtle.net/bob/hash/doobs.html + [2]: https://burtleburtle.net/bob/c/lookup8.c + """ + blob = bytearray(blob) + orig_len = len(blob) + + a = level + b = level + c = 0x9E3779B97F4A7C13 + + padded = False + while True: + blob_len = len(blob) + if blob_len == 0: + if not padded: + # We need to mix one more time if the blob was not padded. + c += orig_len + a, b, c = _mix64(a, b, c) + + break + + if blob_len < 24: + # If the length of the blob is not divisible by 24, we pad it out + # with zeroes until it is. + # + # We must be careful so as to always insert a zero at index 16, + # which corresponds with the reservation of the length in `c` in the + # original C code. + c += orig_len + + blob.extend(b"\0" * (23 - blob_len)) + blob.insert(16, 0) + + padded = True + + a += ( + blob[0] + + (blob[1] << 8) + + (blob[2] << 16) + + (blob[3] << 24) + + (blob[4] << 32) + + (blob[5] << 40) + + (blob[6] << 48) + + (blob[7] << 56) + ) + b += ( + blob[8] + + (blob[9] << 8) + + (blob[10] << 16) + + (blob[11] << 24) + + (blob[12] << 32) + + (blob[13] << 40) + + (blob[14] << 48) + + (blob[15] << 56) + ) + c += ( + blob[16] + + (blob[17] << 8) + + (blob[18] << 16) + + (blob[19] << 24) + + (blob[20] << 32) + + (blob[21] << 40) + + (blob[22] << 48) + + (blob[23] << 56) + ) + + a %= 0x10000000000000000 + b %= 0x10000000000000000 + c %= 0x10000000000000000 + + a, b, c = _mix64(a, b, c) + + blob = blob[24:] + + return c + + +def _mix64(a: int, b: int, c: int) -> tuple[int, int, int]: + """ + Mix 3 64-bit values reversibly. + + This function is part of the Python port of Bob Jenkin's hash algorithm, as + detailed in `_lookup8`. + """ + a -= b + a -= c + a ^= c >> 43 + a %= 0x10000000000000000 + + b -= c + b -= a + b ^= a << 9 + b %= 0x10000000000000000 + + c -= a + c -= b + c ^= b >> 8 + c %= 0x10000000000000000 + + a -= b + a -= c + a ^= c >> 38 + a %= 0x10000000000000000 + + b -= c + b -= a + b ^= a << 23 + b %= 0x10000000000000000 + + c -= a + c -= b + c ^= b >> 5 + c %= 0x10000000000000000 + + a -= b + a -= c + a ^= c >> 35 + a %= 0x10000000000000000 + + b -= c + b -= a + b ^= a << 49 + b %= 0x10000000000000000 + + c -= a + c -= b + c ^= b >> 11 + c %= 0x10000000000000000 + + a -= b + a -= c + a ^= c >> 12 + a %= 0x10000000000000000 + + b -= c + b -= a + b ^= a << 18 + b %= 0x10000000000000000 + + c -= a + c -= b + c ^= b >> 22 + c %= 0x10000000000000000 + + return a, b, c + + +class DyldSharedCacheHashSet: + """ + A hash set from the DyLD Shared Cache. + + The DyLD Shared Cache uses hash sets in all structures related to Objective-C + Optimization. This class is an interface to them. + """ + + def __init__(self, ptr: int): + self._ptr = ptr + + self.capacity = pwndbg.aglib.memory.u32(self._ptr + 0x04) + self.shift = pwndbg.aglib.memory.u32(self._ptr + 0x0C) + self.mask = pwndbg.aglib.memory.u32(self._ptr + 0x10) + self.salt = pwndbg.aglib.memory.u64(self._ptr + 0x18) + + # Mask must always be one minus a power of two. If this fails, it hints + # that we loaded from an invalid address. + assert (self.mask + 1).bit_count() == 1 + + # Name the offsets of elements in the dynamically-sized portion of the + # structure (which starts at 0x420). + self._checkbytes_offset = 0x420 + self.mask + 1 + self._offsets_offset = self._checkbytes_offset + self.capacity + + # Preload the scramble and tab lists, to save on LLDB calls later on. + self._scramble = pwndbg.aglib.memory.read(self._ptr + 0x20, 0x400) + self._tab = pwndbg.aglib.memory.read(self._ptr + 0x420, self.mask + 1) + + # It is possible that the offsets array is not aligned. The code in + # libmacho does not seem to care about this condition, but we should + # probably watch out if it ever does arise in a real-world scenario. + assert self._offsets_offset % 4 == 0, "Unaligned offset array in Mach-O perfect hash map" + + def _index_of(self, key: bytes) -> int: + lookup = _lookup8(key, self.salt) + + tab = lookup & self.mask + tabbed = self._tab[tab] + + scrambled = struct.unpack("> self.shift) % 0x100000000) ^ scrambled + + def lookup(self, key: bytes) -> int | None: + """ + Look up the given key in the hash set. + + Returns a pointer to the key if it is present, None otherwise. + """ + index = self._index_of(key) + + # In libmacho, Apple uses the checkbytes as a way to quickly reject + # elements that are not in the list without having to compare the keys, + # but we currently have no need for that optimization. + offset = pwndbg.aglib.memory.s32(self._ptr + self._offsets_offset + index * 4) + if offset == 0: + return None + + ptr = self._ptr + offset + + val = pwndbg.aglib.memory.string(ptr) + if val != key: + return None + + return ptr + + def keys(self) -> Generator[bytes]: + """ + Returns an iterator over all the keys present in the hash set. + """ + for i in range(self.capacity): + offset = pwndbg.aglib.memory.s32(self._ptr + self._offsets_offset + i * 4) + if offset == 0: + continue + + yield pwndbg.aglib.memory.string(self._ptr + offset) + + +class DyldSharedCache: + """ + Handle to the DyLD Shared Cache in the address space of the inferior. + + The shared cache format handling code in libmacho has multiple paths for + gathering the same information, depending on a value that is near the + beggining of the header, which indicates that the format has likely evolved + quite a bit since its first intoduction. + + The way the version of a given shared cache is determined isn't exactly + straighforward, and relies on a combination of the `magic` and + `mappingOffset` values. Fortunately for us, however, when `mappingOffset` is + used for this purpose, it follows the fairly widely used pattern of using + the size of the struct to denote its version. + """ + + def __init__(self, addr: int): + self.addr = addr + + # Preload a few a few values, to speed things up later. + images_offset = 0x18 if self._header_size() <= 0x1C4 else 0x1C0 + self._images_base = self.addr + pwndbg.aglib.memory.u32(self.addr + images_offset) + self.image_count = pwndbg.aglib.memory.u32(self.addr + images_offset + 4) + + # Check whether the images are sorted by loading address. + self._images_sorted_by_address = all( + a[1] <= b[1] for a, b in itertools.pairwise(self.images) + ) + + def _header_size(self) -> int: + """ + The length of the shared cache header, in bytes. + """ + # Read `mappingOffset` (+0x10) from the structure. + return pwndbg.aglib.memory.u32(self.addr + 16) + + def mappings(self) -> Generator[DyldSharedCacheMapping]: + """ + Generate the list of memory mappings in the shared cache. + """ + if self._header_size() <= 0x138: + # This header predates `mappingWithSlideOffset` (+0x138), so use the + # regular `mappingOffset` value and regular mapping structures. Read + # the number of mapping structures from `mappingCount` (+0x14). + base = self.addr + self._header_size() + count = pwndbg.aglib.memory.u32(self.addr + 0x14) + + for i in range(count): + entry = base + i * 0x20 + yield DyldSharedCacheMapping( + pwndbg.aglib.memory.u64(entry), + pwndbg.aglib.memory.u64(entry + 8), + pwndbg.aglib.memory.u64(entry + 16), + pwndbg.aglib.memory.u32(entry + 24), + pwndbg.aglib.memory.u32(entry + 28), + ) + else: + # We can use `mappingWithSlideOffset` (+0x138) and mapping with + # slide structures for the mappings. Read the number of mapping + # structures from `mappingWithSlideCount` (+0x13c). + base = self.addr + pwndbg.aglib.memory.u32(self.addr + 0x138) + count = pwndbg.aglib.memory.u32(self.addr + 0x13C) + + for i in range(count): + entry = base + i * 0x38 + yield DyldSharedCacheMapping( + pwndbg.aglib.memory.u64(entry), + pwndbg.aglib.memory.u64(entry + 8), + pwndbg.aglib.memory.u64(entry + 16), + pwndbg.aglib.memory.u32(entry + 48), + pwndbg.aglib.memory.u32(entry + 52), + ) + + @property + def base(self) -> int: + """ + The base virtual address of the DyLD Shared Cache. + """ + return self.addr + + @property + def size(self) -> int: + """ + The mapped size, in bytes, of the DyLD Shared Cache. + """ + if self._header_size() >= 0x18C: + # Use `sharedRegionSize` (+0xe8) as the size of the entire shared + # region. + return pwndbg.aglib.memory.u64(self.addr + 0xE8) + else: + # Find the smallest region that covers all the mappings as the size. + start = None + end = None + for mapping in self.mappings(): + if start is None or start > mapping.addr: + start = mapping.addr + + this_end = start + mapping.size + if end is None or end < this_end: + end = this_end + + # Technically possible, but more likely indicates that we messed up + # somewhere along the line when interpreting mapping information. + assert start is not None and end is not None, "No dyld shared cache mappings?" + assert end >= start + + return end - start + + @property + def slide(self) -> int: + "The slide value of the DyLD Shared Cache, in bytes." + mapping_ptr = self.base + self._header_size() + mapping_base = pwndbg.aglib.memory.u64(mapping_ptr) + + # Make sure this is the start of the shared cache. + # + # Again, technically possible, but this breaks compatibility in a way + # that we have no idea how to deal with. Better to fail and figure out + # we're doing something wrong than have to track a random bug back to + # this point. + mapping_fileoff = pwndbg.aglib.memory.u64(mapping_ptr + 0x10) + assert ( + mapping_fileoff == 0 + ), "First mapping of the shared cache is not at the start of the shared cache" + + slide = self.base - mapping_base + assert slide >= 0, "Slide value is negative, but we don't expect it to be" + + return slide + + @property + def image_index_trie(self) -> Trie[int] | None: + """ + The trie of image indices, if available. + """ + if self._header_size() <= 0x110: + return None + + trie_unslid = pwndbg.aglib.memory.u64(self.addr + 0x108) + trie_ptr = trie_unslid + self.slide + + return Trie(trie_ptr, _uleb128_ty) + + def image_base(self, index: int): + assert self.image_count > index + + return pwndbg.aglib.memory.u64(self._images_base + index * 0x20) + + def image_name(self, index: int): + assert self.image_count > index + + return pwndbg.aglib.memory.string( + self.addr + pwndbg.aglib.memory.u32(self._images_base + index * 0x20 + 0x18) + ) + + @property + def images(self) -> Generator[Tuple[bytes, int]]: + # This is a little convoluted, but this function is quite hot and + # calling the debugger can be quite slow, so pulling in the whole array + # at once goes a really long way. + # + # Yes, even with the extra logic. Python is slow, but it's not as + # slow as calling LLDB an extra time on every iteration. + data = pwndbg.aglib.memory.read(self._images_base, 0x20 * self.image_count) + + for i in range(self.image_count): + base = i * 0x20 + yield ( + pwndbg.aglib.memory.string( + self.addr + struct.unpack(" Generator[Tuple[bytes, int]]: + "Same as images, but guaranteed to be sorted by increasing base address" + if self._images_sorted_by_address: + # The images are naturally sorted by increasing base address. + # + # This should be true the _vast_ majority of the time, and perhaps + # even all the time. Just connect the generators. + yield from self.images + else: + # The images are sorted in some other order. + # + # This should be very rare, but we shoulnd't fail if it happens. + # Unlike the other cases in which we have to choose whether to fail + # at or gracefully handle a weird condition, libmacho doesn't seem + # to rely on this being the case. + images = list(self.images) + images.sort(key=lambda image: image[1]) + + yield from iter(images) + + def is_address_in_shared_cache(self, addr: int) -> int: + """ + Whether the given address is in the shared cache. + """ + return addr >= self.base and addr < self.base + self.size + + def objc_builtin_selectors(self) -> DyldSharedCacheHashSet: + """ + Looks up the hash table of builtin Objective-C selectors and returns it. + """ + if self._header_size() > 0x1D8: + # Use `objcOptsOffset` and the new Objective-C optimizations header + # to find the address of the symbol hash set. + + objc_opt_offset = pwndbg.aglib.memory.u64(self.addr + 0x1D0) + objc_opt_ptr = self.addr + objc_opt_offset + + offset = pwndbg.aglib.memory.u64(objc_opt_ptr + 0x18) + ptr = self.addr + offset + + # Technically possible, but we have *no* idea what to do if this + # happens, and it's more likely that we got something wrong. + assert ( + offset != 0 + ), "Tried to query builtin selector identity, but have no Objective-C optimization header?" + else: + raise NotImplementedError( + "Objective-C optimization queries are not yet supported for shared caches that have no objcOptsOffset value" + ) + + return DyldSharedCacheHashSet(ptr) + + +@pwndbg.lib.cache.cache_until("exit") +def shared_cache() -> DyldSharedCache | None: + """ + Base address of the Darwin shared cache. + + In Darwin, the way the Objective-C Runtime queries for this value is to call + `_dyld_get_shared_cache_range` from libdyld[1], which then calls a routine + that lives inside dyld itself, and that returns the values after poking into + internal C++ structures. + + From our perspective, that kind of sucks. Calling routines from debuggers + can be quite unreliable, and so ideally we'd always be peeking into the data + structures directly. But, in this case, even for Apple these are considered + entirely private to dyld[2], and so there's even less of a stability guarantee + for the layout of these structures than normal. + + Because of this, a level of care must be taken before calling this function, + as it must be assumed that the state of the inferior can be changed by it. + + [1]: https://github.com/apple-oss-distributions/objc4/blob/f126469408dc82bd3f327217ae678fd0e6e3b37c/runtime/objc-opt.mm#L434 + [2]: https://github.com/apple-oss-distributions/dyld/blob/main/doc/dyld4.md#libdylddylib + """ + base = int( + pwndbg.dbg.selected_inferior().evaluate_expression( + "(const void*)_dyld_get_shared_cache_range()" + ) + ) + + if base == 0: + return None + + return DyldSharedCache(base) diff --git a/pwndbg/aglib/objc.py b/pwndbg/aglib/objc.py new file mode 100644 index 000000000..8f327fd9e --- /dev/null +++ b/pwndbg/aglib/objc.py @@ -0,0 +1,906 @@ +""" +Apple Objective-C Runtime Support + +This module implements support for analyzing the Apple Objective-C runtime. As +expected, Apple provides no oficial specification for the internal ABI of ObjC +and no guarantees of its stability, and so this module is not guaranteed to +work on all versions of Darwin. +""" + +from __future__ import annotations + +from typing import Callable +from typing import Generator +from typing import Generic +from typing import TypeVar + +from typing_extensions import override + +import pwndbg +import pwndbg.aglib.arch +import pwndbg.aglib.macho +import pwndbg.aglib.memory +import pwndbg.aglib.symbol +import pwndbg.aglib.typeinfo + +T = TypeVar("T") + + +class _IdRaw: + """ + Pointer to an Objective-C object in the heap. + """ + + def __init__(self, ptr: int): + self.addr = ptr + + +class _IdTagged: + """ + Tagged pointer to an Objective-C object. + + This is a bit of a misnomer, as tagged pointers may not be pointers at all, + and the data for the entire object may be contained in the payload, with no + backing allocation in the heap. It is up to the class to determine how to + decode the payload properly. + """ + + def __init__(self, tag: int, payload: int, extended: bool): + self.tag = tag + self.payload = payload + self.extended = extended + + def lookup_class(self) -> Class: + """ + Looks up the class object matching the tag in this pointer. + """ + classes = _tagged_pointer_classes() + + if self.extended: + classes += self.tag - 256 + else: + classes += self.tag + + ptr = pwndbg.aglib.memory.read_pointer_width(int(classes.address)) + ptr = _ptrauth_strip(ptr) + + return Class(ptr) + + +class _IsaPtr: + """ + Pointer to an `isa_t` structure. + """ + + ISA_MASK = 0x0000000FFFFFFFF8 + "Mask of bits containing just the authenticated class pointer." + + def __init__(self, addr: int): + self._addr = addr + + def _read(self) -> int: + """ + Read the bits of the `isa_t` structure into an integer. + """ + return pwndbg.aglib.memory.read_pointer_width(self._addr) + + def get_class(self) -> Class: + ptr = self._read() & _IsaPtr.ISA_MASK + ptr = _ptrauth_strip(ptr) + + return Class(ptr) + + +def _isa_class_mask() -> int: + return pwndbg.aglib.memory.read_pointer_width( + pwndbg.aglib.symbol.lookup_symbol_addr("objc_debug_isa_class_mask") + ) + + +class _ClassRoPtr: + RO_META = 0x1 + RO_ROOT = 0x2 + RO_HAS_CXX_STRUCTORS = 0x4 + RO_HIDDEN = 0x10 + RO_EXCEPTION = 0x20 + RO_HAS_SWIFT_INITIALIZER = 0x40 + RO_IS_ARC = 0x80 + RO_HAS_CXX_DTOR_ONLY = 0x100 + RO_HAS_WEAK_WITHOUT_ARC = 0x200 + RO_FORBIDS_ASSOCIATED_OBJECTS = 0x400 + RO_FROM_BUNDLE = 0x20000000 + RO_FUTURE = 0x40000000 + RO_REALIZED = 0x80000000 + + def __init__(self, addr: int): + self._ptr = addr + + def name(self) -> bytes: + ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 24) + return pwndbg.aglib.memory.string(ptr) + + def flags(self) -> int: + return pwndbg.aglib.memory.u32(self._ptr) + + def methods(self) -> Generator[Method]: + ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 32) + + if ptr & 1 == 0: + if ptr == 0: + return + yield from _MethodList(ptr).entries() + else: + if ptr & ~1 == 0: + # Not expected to happen, but better safe than sorry. + return + + list_of_lists = _RelativeListOfLists(_MethodList, ptr & ~1) + for lst in list_of_lists.entries(): + if lst is None: + continue + yield from lst.get_list().entries() + + def ivars(self) -> Generator[InstanceVariable]: + ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 0x30) + if ptr != 0: + yield from _IVarList(ptr).entries() + + def properties(self) -> Generator[ClassProperty]: + ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 0x40) + if ptr & 1 == 0: + if ptr == 0: + return + + yield from _ClassPropertyList(ptr).entries() + else: + if ptr & ~1 == 0: + # Not expected to happen, but better safe than sorry. + return + + list_of_lists = _RelativeListOfLists(_ClassPropertyList, ptr & ~1) + for lst in list_of_lists.entries(): + if lst is None: + continue + yield from lst.get_list().entries() + + +class _ClassRwExtPtr: + def __init__(self, ptr: int): + self._ptr = ptr + + def ro(self) -> _ClassRoPtr: + ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr) + ptr = _ptrauth_strip(ptr) + return _ClassRoPtr(ptr) + + def methods(self) -> _ListArray[Method]: + return _ListArray(_MethodList, self._ptr + pwndbg.aglib.typeinfo.ptrsize) + + def properties(self) -> _ListArray[ClassProperty]: + return _ListArray(_ClassPropertyList, self._ptr + 2 * pwndbg.aglib.typeinfo.ptrsize) + + def demangled_name(self) -> bytes | None: + ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 4 * pwndbg.aglib.typeinfo.ptrsize) + if ptr == 0: + return None + return pwndbg.aglib.memory.string(ptr) + + def version(self) -> int: + return pwndbg.aglib.memory.u32(self._ptr + 5 * pwndbg.aglib.typeinfo.ptrsize) + + +class _ClassRwPtr: + RW_REALIZED = 1 << 31 + + def __init__(self, ptr: int): + self._ptr = ptr + + def ro_or_rw_ext(self) -> _ClassRoPtr | _ClassRwExtPtr: + ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 8) + if ptr & 1 == 1: + return _ClassRwExtPtr(ptr & ~1) + else: + return _ClassRoPtr(ptr) + + +class _ClassDataBitsPtr: + """ + Pointer to a `class_data_bits_t` structure. + """ + + FAST_IS_RW_POINTER = 0x8000000000000000 + FAST_IS_SWIFT_LEGACY = 0x1 + FAST_IS_SWIFT_STABLE = 0x2 + FAST_HAS_DEFAULT_RR = 0x4 + + FAST_DATA_MASK = 0x0F007FFFFFFFFFF8 + + def __init__(self, ptr: int): + self._ptr = ptr + + def data(self) -> _ClassRoPtr | _ClassRwPtr: + if self._is_rw(): + return _ClassRwPtr(self._data_addr()) + + return _ClassRoPtr(self._data_addr()) + + def _is_rw(self) -> bool: + return ((self._ptr & _ClassDataBitsPtr.FAST_IS_RW_POINTER) != 0) or ( + (self._flags() & _ClassRwPtr.RW_REALIZED) != 0 + ) + + def _data_addr(self) -> int: + return _ptrauth_strip(self._ptr) & _ClassDataBitsPtr.FAST_DATA_MASK + + def _flags(self) -> int: + return pwndbg.aglib.memory.u32(self._data_addr()) + + +class _EntList(Generic[T]): + """ + Entity list. + """ + + _flags_mask: int = 0 + "Mask for the flag bits of `entsizeAndFlags`" + + def __init__(self, ptr: int): + self._addr = self._addr_from_ptr(ptr) + self._ptr = ptr + + def _entsize_and_flags(self) -> int: + return pwndbg.aglib.memory.u32(self._addr) + + def _entries(self) -> int: + return pwndbg.aglib.memory.u32(self._addr + 4) + + def flags(self) -> int: + return self._entsize_and_flags() & self._flags_mask + + def entsize(self) -> int: + return self._entsize_and_flags() & ~self._flags_mask + + def _modify_pointer(self, ptr: int) -> int: + return ptr + + def _from_ptr(self, ptr: int) -> T: + """ + Build the type of this list from a pointer. + + Must be implemented by the specialized class. + """ + raise NotImplementedError() + + def _addr_from_ptr(self, ptr: int) -> int: + """ + Strip any metadata from the pointer to this list. + + Must be implemented by the specialized class. + """ + raise NotImplementedError() + + def __len__(self) -> int: + return self._entries() + + def get(self, i: int) -> T: + if i >= len(self): + raise IndexError(f"Index {i} is out-of-range for entlist with {len(self)} entries") + + return self._from_ptr(self._modify_pointer(self._addr + 8 + i * self.entsize())) + + def entries(self) -> Generator[T]: + for i in range(len(self)): + yield self.get(i) + + +class _RelativeListOfListsEntry(Generic[T]): + def __init__(self, ty: Callable[[int], _EntList[T]], ptr: int): + self._ptr = ptr + self._ty = ty + + def image_index(self) -> int: + return pwndbg.aglib.memory.u64(self._ptr) & 0xFFFF + + def _list_offset(self) -> int: + return pwndbg.aglib.memory.s64(self._ptr) >> 16 + + def get_list(self) -> _EntList[T]: + return self._ty(self._ptr + self._list_offset()) + + +class _RelativeListOfLists( + _EntList[_RelativeListOfListsEntry[T] | None], + Generic[T], +): + """ + An array of relative pointers to lists. + + This corresponds to the `relative_list_list_t` type in libobjc. + """ + + def __init__(self, ty: Callable[[int], _EntList[T]], ptr: int): + super().__init__(ptr) + self._ty = ty + + @override + def _addr_from_ptr(self, ptr: int) -> int: + # Top-Byte-Ignore is assumed for method lists, but method list pointers + # may have metadata attached to them. + return ptr & ~0xFF00000000000000 + + @override + def _from_ptr(self, ptr: int) -> _RelativeListOfListsEntry[T] | None: + entry = _RelativeListOfListsEntry(self._ty, ptr) + if not _header_info_rw_is_image_loaded(entry.image_index()): + # The entry is only valid if its corresponding image has been marked + # as loaded in `objc_debug_headerInfoRWs`. + return None + + return entry + + +class _ListArray(Generic[T]): + """ + A runtime-polymorphic array type for lists. May be a pointer to a list type, + an array of pointers, or a _RelativeListOfLists, distinguished by a tag in + a pointer. + + Strangely for Apple, the tagged pointer to the final list is contained + inside the list array structure, rather than having the whole structure be + inlined into a pointer value. Suspiciously sane. + + This corresponds to the `list_array_tt` type in libobjc. + """ + + def __init__(self, ty: Callable[[int], _EntList[T]], ptr: int): + self._ptr = ptr + self._ty = ty + + def entries(self) -> Generator[T]: + raw_ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr) + + tag = raw_ptr & 3 + ptr = raw_ptr & ~3 + + if ptr == 0: + return + + if tag == 0: + # This is just a pointer to the list. + yield from self._ty(ptr).entries() + elif tag == 1: + # This is an array of lists. + count = pwndbg.aglib.memory.u32(ptr) + for i in range(count): + yield from self._ty( + pwndbg.aglib.memory.read_pointer_width( + ptr + 8 + i * pwndbg.aglib.typeinfo.ptrsize + ) + ).entries() + elif tag == 2: + # This is a relative list of lists. + for ll in _RelativeListOfLists(self._ty, ptr).entries(): + yield from ll.get_list().entries() + + +def _header_info_rw_is_image_loaded(index: int) -> bool: + """ + Queries `objc_debug_headerInfoRWs` and checks whether the image with the + given index is loaded. + """ + addr = pwndbg.aglib.memory.read_pointer_width( + pwndbg.aglib.symbol.lookup_symbol_addr("objc_debug_headerInfoRWs") + ) + + count = pwndbg.aglib.memory.u32(addr) + entsize = pwndbg.aglib.memory.u32(addr + 4) + + if index >= count: + raise IndexError( + f"Image index {index} is out-of-bounds for headerInfoRWs structure with {count} entries" + ) + + return pwndbg.aglib.memory.read_pointer_width(addr + 8 + entsize * index) & 1 == 1 + + +def _tagged_pointer_classes() -> pwndbg.dbg_mod.Value: + """ + The Objective-C runtime tagged pointer classs list. + + The classes to which the tag values in a tagged pointer corresponds are not + fixed, and are instead stored in a runtime-global array that gets looked up + when a message is sent. + """ + return pwndbg.aglib.symbol.lookup_symbol("objc_debug_taggedpointer_classes").cast( + pwndbg.aglib.typeinfo.void.pointer().pointer() + ) + + +def _ptr_obfuscation_value() -> int: + """ + The Objective-C runtime obfuscates tagged pointer values. + """ + return pwndbg.aglib.memory.read_pointer_width( + pwndbg.aglib.symbol.lookup_symbol_addr("objc_debug_taggedpointer_obfuscator") + ) + + +def _try_decode_tagged_split(ptr: int) -> _IdTagged | None: + """ + Decodes a tagged pointer encoded in the split-tag scheme, if it is tagged. + + This is the encoding scheme used in modern - iOS 14 and newer - ARM64 + platforms. + + If the pointer is not tagged, returns `None`. + """ + if ptr & 0x8000000000000000 == 0: + # Not a tagged pointer. + return None + + if ptr & 7 == 7: + # This is an extended tag with a 52-bit payload. + tag = (ptr >> 55) & 0xFF + payload = (ptr >> 3) & 0xFFFFFFFFFFFFF + extended = True + else: + # This is a short tag with a 60-bit payload. + tag = ptr & 7 + payload = (ptr >> 3) & 0xFFFFFFFFFFFFFFF + extended = False + + return _IdTagged(tag, payload, extended) + + +def _try_decode_tagged_lsb(ptr: int) -> _IdTagged: + """ + Decodes a tagged pointer encoded in the LSB-tag scheme, if it is tagged. + + This is the encoding scheme used in all x86-64 versions of Darwin. + + If the pointer is not tagged, returns `None`. + """ + if ptr & 1 == 0: + # Not a tagged pointer. + return None + + if ptr & 14 == 14: + # This is an extended tag with a 52-bit payload. + tag = (ptr >> 4) & 0xFF + payload = ptr >> 12 + extended = True + else: + # This is a short tage with a 60-bit payload. + tag = (ptr >> 1) & 7 + payload = ptr >> 4 + extended = False + + return _IdTagged(tag, payload, extended) + + +def _decode_prog_id(ptr: int) -> _IdRaw | _IdTagged: + """ + Given an Objective-C program, decode it. + """ + + # First, check for tagged pointers. + tagged = None + match pwndbg.aglib.arch.name: + case "aarch64": + tagged = _try_decode_tagged_split(ptr) + case "x86-64": + tagged = _try_decode_tagged_lsb(ptr) + case other: + raise AssertionError(f"Unexpected Objective-C architecture: {other}") + if tagged is not None: + # Successfuly decoded the tagged pointer. + return tagged + + # This is a direct pointer. + return _IdRaw(ptr) + + +def _ptrauth_strip(ptr: int) -> int: + """ + Strip pointer signing information from a given signed pointer. + """ + return ptr & 0xFFFFFFFFFFFF + + +class Object: + _addr: int + "Object pointer value, as seen in the program. May be tagged, obfuscated, authenticated." + + _id: _IdRaw | _IdTagged + "Decoded object pointer value. May be tagged." + + def __init__(self, addr: int): + self._addr = addr + self._id = _decode_prog_id(addr) + + @property + def cls(self) -> Class | None: + if isinstance(self._id, _IdRaw): + isa = _IsaPtr(self._id.addr) + return isa.get_class() + elif isinstance(self._id, _IdTagged): + return self._id.lookup_class() + + +class Class(Object): + def __init__(self, addr: int): + super().__init__(addr) + assert isinstance(self._id, _IdRaw), "Class pointers are never tagged" + + def _data_bits(self) -> _ClassDataBitsPtr: + # MyPy fails if we don't check this a second time. + assert isinstance(self._id, _IdRaw), "Class pointers are never tagged" + ptr = pwndbg.aglib.memory.read_pointer_width(self._id.addr + 32) + ptr = _ptrauth_strip(ptr) + return _ClassDataBitsPtr(ptr) + + def _ro(self) -> _ClassRoPtr: + data = self._data_bits().data() + if isinstance(data, _ClassRoPtr): + return data + elif isinstance(data, _ClassRwPtr): + ro_or_rw_ext = data.ro_or_rw_ext() + if isinstance(ro_or_rw_ext, _ClassRwExtPtr): + return ro_or_rw_ext.ro() + elif isinstance(ro_or_rw_ext, _ClassRoPtr): + return ro_or_rw_ext + else: + # FIXME: Should be `typing.assert_never`, needs Python 3.11 + assert False + else: + # FIXME: Should be `typing.assert_never`, needs Python 3.11 + assert False + + def _rw_ext(self) -> _ClassRwExtPtr | None: + data = self._data_bits().data() + if isinstance(data, _ClassRoPtr): + return None + elif isinstance(data, _ClassRwPtr): + ro_or_rw_ext = data.ro_or_rw_ext() + if isinstance(ro_or_rw_ext, _ClassRwExtPtr): + return ro_or_rw_ext + elif isinstance(ro_or_rw_ext, _ClassRoPtr): + return None + else: + # FIXME: Should be `typing.assert_never`, needs Python 3.11 + assert False + else: + # FIXME: Should be `typing.assert_never`, needs Python 3.11 + assert False + + @property + def superclass(self) -> Class | None: + # MyPy fails if we don't check this a second time. + assert isinstance(self._id, _IdRaw), "Class pointers are never tagged" + + if self._ro().flags() & _ClassRoPtr.RO_ROOT != 0: + # This is a root class, and thus has no superclass. + return None + + ptr_addr = self._id.addr + pwndbg.aglib.typeinfo.ptrsize + ptr = pwndbg.aglib.memory.read_pointer_width(ptr_addr) + ptr = _ptrauth_strip(ptr) + + return Class(ptr) + + @property + def name(self) -> bytes: + return self._ro().name() + + @property + def methods(self) -> Generator[Method]: + if (rw_ext := self._rw_ext()) is not None: + # Return the methods added to the class at runtime from the Class + # R/W structure, which also include the base methods. + yield from rw_ext.methods().entries() + else: + # Return the base methods. + yield from self._ro().methods() + + @property + def ivars(self) -> Generator[InstanceVariable]: + yield from self._ro().ivars() + + @property + def properties(self) -> Generator[ClassProperty]: + if (rw_ext := self._rw_ext()) is not None: + # Return the properties added to the class at runtime from the Class + # R/W structure, which also include the base properties. + yield from rw_ext.properties().entries() + else: + # Return the base properties. + yield from self._ro().properties() + + @property + def is_metaclass(self) -> bool: + return (self._ro().flags() & _ClassRoPtr.RO_META) != 0 + + @override + @property + def cls(self) -> Class | None: + if self.is_metaclass: + # Following this pointer in metaclasses is weird. Users are better + # served following the superclass chain, instead. + return None + return super().cls + + +class InstanceVariable: + """ + An Objective-C Instance Variable. + + Instance Variables are NOT objects! + """ + + def __init__(self, ptr: int): + self._ptr = ptr + + @property + def offset(self) -> int: + """ + The offset in bytes of this value from the start of the object instance. + """ + return pwndbg.aglib.memory.s32(pwndbg.aglib.memory.read_pointer_width(self._ptr)) + + @property + def name(self) -> bytes: + """ + The name of this instance variable. + """ + return pwndbg.aglib.memory.string( + pwndbg.aglib.memory.read_pointer_width(self._ptr + pwndbg.aglib.typeinfo.ptrsize) + ) + + @property + def typename(self) -> bytes: + """ + The name of the type of this instance variable. + """ + return pwndbg.aglib.memory.string( + pwndbg.aglib.memory.read_pointer_width(self._ptr + pwndbg.aglib.typeinfo.ptrsize * 2) + ) + + @property + def alignment(self) -> int: + """ + The alignment of this instance variable, in bytes. + """ + align_log2 = pwndbg.aglib.memory.u32(self._ptr + pwndbg.aglib.typeinfo.ptrsize * 3) + + # All ones indicates the natural alignment of a pointer. + if align_log2 == 0xFFFFFFFF: + return pwndbg.aglib.typeinfo.ptrsize + + return 1 << align_log2 + + @property + def size(self) -> int: + """ + The size of this instance variable, in bytes. + """ + return pwndbg.aglib.memory.u32(self._ptr + pwndbg.aglib.typeinfo.ptrsize * 3 + 4) + + +class ClassProperty: + def __init__(self, ptr: int): + self._ptr = ptr + + @property + def name(self) -> bytes: + """ + The name of this class property. + """ + return pwndbg.aglib.memory.string(pwndbg.aglib.memory.read_pointer_width(self._ptr)) + + @property + def value(self) -> bytes: + """ + The value of this property. + """ + return pwndbg.aglib.memory.string( + pwndbg.aglib.memory.read_pointer_width(self._ptr + pwndbg.aglib.typeinfo.ptrsize) + ) + + +class Selector: + """ + An Objective-C Selector. + + Selectors are NOT objects! + """ + + def __init__(self, ptr: int): + self._ptr = ptr + + @property + def name(self) -> bytes: + """ + Retrieves the name of this selector. + """ + + # In the Apple Objective-C runtime, selectors are human-readable strings + # with unique identities[1]. The identity is simply the pointer to the + # string itself, guaranteed by the tooling to be unique. To read the + # name of the selector, then, we can simply follow its identity pointer. + # + # [1]: https://web.archive.org/web/20161010081824/http://unixjunkie.blogspot.com/2006/02/nil-and-nil.html + return pwndbg.aglib.memory.string(self._ptr) + + +class Method: + """ + An Objective-C Method Pointer. + + Methods are NOT objects! + + A method pointer can be one of three types: Small, small direct, and big. + + Pointer types are distinguished by the two least significant bits in the + integer representation of the pointer. A value of `1` is used for both small + pointer types, while all other values are used to distinguish between the + signing nuances of big pointers. + + Small pointers 32-bit wide and relative to a given base value. Big pointers + contain the pointers themselves, and they may or may not be signed. + + Small direct pointers are small pointers that reside in the shared cache, and + their selectors are relative to @selector(🤯), while the selectors of regular + small pointers are relative to the pointers themselves. + """ + + def __init__(self, ptr: int): + self._ptr = ptr + + @property + def sel(self) -> Selector: + "The selector this method responds to." + kind = self._ptr & 3 + base = self._ptr & ~3 + if kind == 1: + if pwndbg.aglib.macho.shared_cache().is_address_in_shared_cache(base): + # To resolve selectors of small method pointers in the shared cache, + # we have to look up the identity of @selector(🤯). + rel = ( + pwndbg.aglib.macho.shared_cache() + .objc_builtin_selectors() + .lookup("🤯".encode("utf-8")) + ) + ptr = rel + pwndbg.aglib.memory.s32(base) + else: + offset = pwndbg.aglib.memory.s32(base) + ref = base + offset + + # Non-shared cache values are pointers to selectors. + ptr = pwndbg.aglib.memory.read_pointer_width(ref) + + return Selector(ptr) + else: + return Selector(_ptrauth_strip(pwndbg.aglib.memory.read_pointer_width(base))) + + @property + def types(self) -> bytes: + "The types of the arguments to this method." + kind = self._ptr & 3 + base = self._ptr & ~3 + if kind == 1: + ptr = base + 4 + offset = pwndbg.aglib.memory.s32(ptr) + addr = ptr + offset + else: + ptr = base + 8 + addr = _ptrauth_strip( + pwndbg.aglib.memory.read_pointer_width(base + pwndbg.aglib.typeinfo.ptrsize) + ) + + return pwndbg.aglib.memory.string(addr) + + @property + def imp(self) -> int: + "The pointer to the function that implements this method." + kind = self._ptr & 3 + base = self._ptr & ~3 + if kind == 1: + # There's a bit of nuance here. + # + # Method swizzling for small pointers is implemented using a global + # hash map of method pointers to implementation pointers. When + # getting the IMP pointer for a small pointer, the runtime will + # first check the global hash map to see if the method has been + # swizzled, and return the swizzled method if it has. The runtime + # will do what we do here if method has not been swizzled. + # + # Currently, we have no good way to query this map, and no other way + # to detect that a method has been swizzled, so swizzles to small + # pointers are unfortunately compeltely invisible to us. + # + # TODO: Handle method swizzles for small-pointer-type Objective-C methods. + ptr = base + 8 + offset = pwndbg.aglib.memory.s32(ptr) + return ptr + offset + else: + ptr = base + 16 + return _ptrauth_strip( + pwndbg.aglib.memory.read_pointer_width(base + pwndbg.aglib.typeinfo.ptrsize) + ) + + +class _MethodList(_EntList[Method]): + """ + Method entity list. + """ + + _flags_mask = 0xFFFF0003 + + SMALL_METHOD_LIST_FLAG = 0x80000000 + "Indicates that the pointers in this list are small method pointers." + + BIG_SIGNED_METHOD_LIST_FLAG = 0x8000000000000000 + """ + Indicates that the pointers in this list are big and signed. + + Stored as part of the pointer to the method list, rather than in the flags + field, as is the case with other flags. + """ + + @override + def _modify_pointer(self, ptr: int) -> int: + if self.flags() & self.SMALL_METHOD_LIST_FLAG != 0: + # This is a small pointer list. + return (ptr & ~3) | 1 + elif self._ptr & self.BIG_SIGNED_METHOD_LIST_FLAG: + # This is a big signed poitner list. + return (ptr & ~3) | 2 + else: + # No tag or flag. This is a big pointer list. + return ptr & ~3 + + @override + def _addr_from_ptr(self, ptr: int) -> int: + # Top-Byte-Ignore is assumed for method lists, but method list pointers + # may have metadata attached to them. + return ptr & ~0xFF00000000000000 + + @override + def _from_ptr(self, ptr: int) -> Method: + return Method(ptr) + + +class _IVarList(_EntList[InstanceVariable]): + "IVar entity list." + + _flags_mask = 0 + + @override + def _modify_pointer(self, ptr: int) -> int: + return ptr + + @override + def _addr_from_ptr(self, ptr: int) -> int: + return ptr + + @override + def _from_ptr(self, ptr: int) -> InstanceVariable: + return InstanceVariable(ptr) + + +class _ClassPropertyList(_EntList[ClassProperty]): + "Class property entity list." + + _flags_mask = 0 + + @override + def _modify_pointer(self, ptr: int) -> int: + return ptr + + @override + def _addr_from_ptr(self, ptr: int) -> int: + return ptr + + @override + def _from_ptr(self, ptr: int) -> ClassProperty: + return ClassProperty(ptr)