Add initial support for Objective-C and Mach-O on Darwin (#3249)

* Initial Apple Objective-C ABI support

* Add support for instance variables and class properties

* Add support for read-write object properties

* Add trie parsing and image listing support for Mach-O

* Add sorted iterator to DYLD Shared Cache image listing

* Address feedback
pull/3258/head
Matt. 4 months ago committed by GitHub
parent 260a7204a7
commit fa566efa1c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -29,9 +29,11 @@ def load_aglib():
import pwndbg.aglib.heap import pwndbg.aglib.heap
import pwndbg.aglib.kernel import pwndbg.aglib.kernel
import pwndbg.aglib.kernel.vmmap import pwndbg.aglib.kernel.vmmap
import pwndbg.aglib.macho
import pwndbg.aglib.memory import pwndbg.aglib.memory
import pwndbg.aglib.nearpc import pwndbg.aglib.nearpc
import pwndbg.aglib.next import pwndbg.aglib.next
import pwndbg.aglib.objc
import pwndbg.aglib.onegadget import pwndbg.aglib.onegadget
import pwndbg.aglib.proc import pwndbg.aglib.proc
import pwndbg.aglib.qemu import pwndbg.aglib.qemu

@ -0,0 +1,668 @@
from __future__ import annotations
import itertools
import struct
from typing import Callable
from typing import Generator
from typing import Generic
from typing import Tuple
from typing import TypeVar
import pwndbg
import pwndbg.aglib.memory
def _uleb128(ptr: int) -> Tuple[int, int]:
"""
Decode a ULEB128 value at the start of the given address, and return the
decoded number, along with how many bytes the entire number takes.
"""
acc = 0
i = 0
while True:
byte = pwndbg.aglib.memory.u8(ptr + i)
acc |= (byte & 0x7F) << (7 * i)
if byte & 0x80 == 0:
# This is the terminator byte.
break
i += 1
return acc, i + 1
class _RawTrie:
"""
This is the untyped base implementation of Trie.
"""
def __init__(self, ptr: int):
self._ptr = ptr
def _walk(
self,
offset: int,
acc: bytes,
edgesel: Callable[[bytes, bytes], bool],
nodesel: Callable[[bytes], bool],
) -> Generator[Tuple[bytes, int, int]]:
"""
Walk the trie.
Allows callers to select edges for exploration and nodes for yielding
through the `edgesel` and `nodesel` callables.
At every edge, this function will call `edgesel` with the currently
accumulated name and the name associated with the edge, and will take
action according to the value it returns. If it returns True, that edge
will be explored, otherwise, the edge will be ignored.
At every node, this function will call `nodesel` with the currently
accumulated name. If it returns True, the node will be yielded,
otherwise, it will be ignored.
Yielded node information consists of a tuple of (name, ptr, length),
where `name` is the name of the node, `ptr` is the address of the first
byte of its associated data, and `length` is the length of its
associated data, in bytes.
"""
base = self._ptr + offset
node_data_len, node_data_len_len = _uleb128(base)
if node_data_len != 0 and nodesel(acc):
# The user selected this node, stop the walk here.
yield acc, base + node_data_len_len, node_data_len
cursor = base + node_data_len_len + node_data_len
# The number of children is NOT a ULEB128.
children = pwndbg.aglib.memory.u8(cursor)
cursor += 1
for _ in range(children):
name = pwndbg.aglib.memory.string(cursor)
cursor += len(name) + 1
child_offset, child_offset_len = _uleb128(cursor)
cursor += child_offset_len
if edgesel(acc, name):
yield from self._walk(child_offset, acc + name, edgesel, nodesel)
# The cursor is already at the next child.
def _get_raw(self, name: bytes) -> Tuple[bytes, int, int] | None:
"""
Get the data associated with the node of given name, if it exists.
"""
def nodesel(candidate: bytes) -> bool:
return candidate == name
def edgesel(acc: bytes, candidate: bytes) -> bool:
return name[len(acc) :].startswith(candidate)
return next(self._walk(0, b"", edgesel, nodesel), None)
def _entries_raw(self) -> Generator[Tuple[bytes, int, int]]:
"""
List all the entries in the trie, along with their associated data.
"""
yield from self._walk(0, b"", lambda _acc, _candidate: True, lambda _candidate: True)
def keys(self) -> Generator[bytes]:
"""
List the name of all nodes in the trie.
"""
yield from (name for name, _ptr, _size in self._entries_raw())
T = TypeVar("T")
class Trie(_RawTrie, Generic[T]):
"""
Prefix Tree
The Mach-O format makes extensive use of prefix trees for any operation that
involves string-based loookup.
"""
def __init__(self, ptr: int, ty: Callable[[int, int], T]):
super().__init__(ptr)
self._ty = ty
def get(self, name: bytes) -> T | None:
"""
Get the data associated with the node of given name, if it exists.
"""
_, ptr, size = self._get_raw(name)
return self._ty(ptr, size)
def entries(self) -> Generator[Tuple[bytes, T]]:
"""
List all the entries in the trie, along with their associated data.
"""
yield from ((name, self._ty(ptr, size)) for name, ptr, size in self._entries_raw())
def _uleb128_ty(ptr: int, size: int) -> int:
"The type function of ULEB128 associated data, for use with Trie"
value, actual_size = _uleb128(ptr)
# Can fail if the type is wrong or the trie is corrupted.
assert size == actual_size, "Size mismatch while validating ULEB128"
return value
class DyldSharedCacheMapping:
def __init__(self, addr: int, size: int, file_offset: int, max_prot: int, init_prot: int):
self.addr = addr
self.size = size
self.file_offset = file_offset
self.max_prot = max_prot
self.init_prot = init_prot
def _lookup8(blob: bytes, level: int) -> int:
"""
Hashes a variable-length byte array into a 64-bit integer.
Apple uses a variation of an algorithm published by Bob Jenkins in 1997 on
Dr. Dobb's Journal, and later republished on their website under the title
"The Hash"[1]. The version used by Apple was also written by Jenkins[2], but
does not seem to be mentioned in any of their articles, so I don't couldn't
gather much information about it besides that it looks like a 64-bit variant
of the algorithm in the article.
This function is a direct Python port of the algorithm in [2].
[1]: https://burtleburtle.net/bob/hash/doobs.html
[2]: https://burtleburtle.net/bob/c/lookup8.c
"""
blob = bytearray(blob)
orig_len = len(blob)
a = level
b = level
c = 0x9E3779B97F4A7C13
padded = False
while True:
blob_len = len(blob)
if blob_len == 0:
if not padded:
# We need to mix one more time if the blob was not padded.
c += orig_len
a, b, c = _mix64(a, b, c)
break
if blob_len < 24:
# If the length of the blob is not divisible by 24, we pad it out
# with zeroes until it is.
#
# We must be careful so as to always insert a zero at index 16,
# which corresponds with the reservation of the length in `c` in the
# original C code.
c += orig_len
blob.extend(b"\0" * (23 - blob_len))
blob.insert(16, 0)
padded = True
a += (
blob[0]
+ (blob[1] << 8)
+ (blob[2] << 16)
+ (blob[3] << 24)
+ (blob[4] << 32)
+ (blob[5] << 40)
+ (blob[6] << 48)
+ (blob[7] << 56)
)
b += (
blob[8]
+ (blob[9] << 8)
+ (blob[10] << 16)
+ (blob[11] << 24)
+ (blob[12] << 32)
+ (blob[13] << 40)
+ (blob[14] << 48)
+ (blob[15] << 56)
)
c += (
blob[16]
+ (blob[17] << 8)
+ (blob[18] << 16)
+ (blob[19] << 24)
+ (blob[20] << 32)
+ (blob[21] << 40)
+ (blob[22] << 48)
+ (blob[23] << 56)
)
a %= 0x10000000000000000
b %= 0x10000000000000000
c %= 0x10000000000000000
a, b, c = _mix64(a, b, c)
blob = blob[24:]
return c
def _mix64(a: int, b: int, c: int) -> tuple[int, int, int]:
"""
Mix 3 64-bit values reversibly.
This function is part of the Python port of Bob Jenkin's hash algorithm, as
detailed in `_lookup8`.
"""
a -= b
a -= c
a ^= c >> 43
a %= 0x10000000000000000
b -= c
b -= a
b ^= a << 9
b %= 0x10000000000000000
c -= a
c -= b
c ^= b >> 8
c %= 0x10000000000000000
a -= b
a -= c
a ^= c >> 38
a %= 0x10000000000000000
b -= c
b -= a
b ^= a << 23
b %= 0x10000000000000000
c -= a
c -= b
c ^= b >> 5
c %= 0x10000000000000000
a -= b
a -= c
a ^= c >> 35
a %= 0x10000000000000000
b -= c
b -= a
b ^= a << 49
b %= 0x10000000000000000
c -= a
c -= b
c ^= b >> 11
c %= 0x10000000000000000
a -= b
a -= c
a ^= c >> 12
a %= 0x10000000000000000
b -= c
b -= a
b ^= a << 18
b %= 0x10000000000000000
c -= a
c -= b
c ^= b >> 22
c %= 0x10000000000000000
return a, b, c
class DyldSharedCacheHashSet:
"""
A hash set from the DyLD Shared Cache.
The DyLD Shared Cache uses hash sets in all structures related to Objective-C
Optimization. This class is an interface to them.
"""
def __init__(self, ptr: int):
self._ptr = ptr
self.capacity = pwndbg.aglib.memory.u32(self._ptr + 0x04)
self.shift = pwndbg.aglib.memory.u32(self._ptr + 0x0C)
self.mask = pwndbg.aglib.memory.u32(self._ptr + 0x10)
self.salt = pwndbg.aglib.memory.u64(self._ptr + 0x18)
# Mask must always be one minus a power of two. If this fails, it hints
# that we loaded from an invalid address.
assert (self.mask + 1).bit_count() == 1
# Name the offsets of elements in the dynamically-sized portion of the
# structure (which starts at 0x420).
self._checkbytes_offset = 0x420 + self.mask + 1
self._offsets_offset = self._checkbytes_offset + self.capacity
# Preload the scramble and tab lists, to save on LLDB calls later on.
self._scramble = pwndbg.aglib.memory.read(self._ptr + 0x20, 0x400)
self._tab = pwndbg.aglib.memory.read(self._ptr + 0x420, self.mask + 1)
# It is possible that the offsets array is not aligned. The code in
# libmacho does not seem to care about this condition, but we should
# probably watch out if it ever does arise in a real-world scenario.
assert self._offsets_offset % 4 == 0, "Unaligned offset array in Mach-O perfect hash map"
def _index_of(self, key: bytes) -> int:
lookup = _lookup8(key, self.salt)
tab = lookup & self.mask
tabbed = self._tab[tab]
scrambled = struct.unpack("<I", self._scramble[tabbed * 4 : (tabbed + 1) * 4])[0]
return ((lookup >> self.shift) % 0x100000000) ^ scrambled
def lookup(self, key: bytes) -> int | None:
"""
Look up the given key in the hash set.
Returns a pointer to the key if it is present, None otherwise.
"""
index = self._index_of(key)
# In libmacho, Apple uses the checkbytes as a way to quickly reject
# elements that are not in the list without having to compare the keys,
# but we currently have no need for that optimization.
offset = pwndbg.aglib.memory.s32(self._ptr + self._offsets_offset + index * 4)
if offset == 0:
return None
ptr = self._ptr + offset
val = pwndbg.aglib.memory.string(ptr)
if val != key:
return None
return ptr
def keys(self) -> Generator[bytes]:
"""
Returns an iterator over all the keys present in the hash set.
"""
for i in range(self.capacity):
offset = pwndbg.aglib.memory.s32(self._ptr + self._offsets_offset + i * 4)
if offset == 0:
continue
yield pwndbg.aglib.memory.string(self._ptr + offset)
class DyldSharedCache:
"""
Handle to the DyLD Shared Cache in the address space of the inferior.
The shared cache format handling code in libmacho has multiple paths for
gathering the same information, depending on a value that is near the
beggining of the header, which indicates that the format has likely evolved
quite a bit since its first intoduction.
The way the version of a given shared cache is determined isn't exactly
straighforward, and relies on a combination of the `magic` and
`mappingOffset` values. Fortunately for us, however, when `mappingOffset` is
used for this purpose, it follows the fairly widely used pattern of using
the size of the struct to denote its version.
"""
def __init__(self, addr: int):
self.addr = addr
# Preload a few a few values, to speed things up later.
images_offset = 0x18 if self._header_size() <= 0x1C4 else 0x1C0
self._images_base = self.addr + pwndbg.aglib.memory.u32(self.addr + images_offset)
self.image_count = pwndbg.aglib.memory.u32(self.addr + images_offset + 4)
# Check whether the images are sorted by loading address.
self._images_sorted_by_address = all(
a[1] <= b[1] for a, b in itertools.pairwise(self.images)
)
def _header_size(self) -> int:
"""
The length of the shared cache header, in bytes.
"""
# Read `mappingOffset` (+0x10) from the structure.
return pwndbg.aglib.memory.u32(self.addr + 16)
def mappings(self) -> Generator[DyldSharedCacheMapping]:
"""
Generate the list of memory mappings in the shared cache.
"""
if self._header_size() <= 0x138:
# This header predates `mappingWithSlideOffset` (+0x138), so use the
# regular `mappingOffset` value and regular mapping structures. Read
# the number of mapping structures from `mappingCount` (+0x14).
base = self.addr + self._header_size()
count = pwndbg.aglib.memory.u32(self.addr + 0x14)
for i in range(count):
entry = base + i * 0x20
yield DyldSharedCacheMapping(
pwndbg.aglib.memory.u64(entry),
pwndbg.aglib.memory.u64(entry + 8),
pwndbg.aglib.memory.u64(entry + 16),
pwndbg.aglib.memory.u32(entry + 24),
pwndbg.aglib.memory.u32(entry + 28),
)
else:
# We can use `mappingWithSlideOffset` (+0x138) and mapping with
# slide structures for the mappings. Read the number of mapping
# structures from `mappingWithSlideCount` (+0x13c).
base = self.addr + pwndbg.aglib.memory.u32(self.addr + 0x138)
count = pwndbg.aglib.memory.u32(self.addr + 0x13C)
for i in range(count):
entry = base + i * 0x38
yield DyldSharedCacheMapping(
pwndbg.aglib.memory.u64(entry),
pwndbg.aglib.memory.u64(entry + 8),
pwndbg.aglib.memory.u64(entry + 16),
pwndbg.aglib.memory.u32(entry + 48),
pwndbg.aglib.memory.u32(entry + 52),
)
@property
def base(self) -> int:
"""
The base virtual address of the DyLD Shared Cache.
"""
return self.addr
@property
def size(self) -> int:
"""
The mapped size, in bytes, of the DyLD Shared Cache.
"""
if self._header_size() >= 0x18C:
# Use `sharedRegionSize` (+0xe8) as the size of the entire shared
# region.
return pwndbg.aglib.memory.u64(self.addr + 0xE8)
else:
# Find the smallest region that covers all the mappings as the size.
start = None
end = None
for mapping in self.mappings():
if start is None or start > mapping.addr:
start = mapping.addr
this_end = start + mapping.size
if end is None or end < this_end:
end = this_end
# Technically possible, but more likely indicates that we messed up
# somewhere along the line when interpreting mapping information.
assert start is not None and end is not None, "No dyld shared cache mappings?"
assert end >= start
return end - start
@property
def slide(self) -> int:
"The slide value of the DyLD Shared Cache, in bytes."
mapping_ptr = self.base + self._header_size()
mapping_base = pwndbg.aglib.memory.u64(mapping_ptr)
# Make sure this is the start of the shared cache.
#
# Again, technically possible, but this breaks compatibility in a way
# that we have no idea how to deal with. Better to fail and figure out
# we're doing something wrong than have to track a random bug back to
# this point.
mapping_fileoff = pwndbg.aglib.memory.u64(mapping_ptr + 0x10)
assert (
mapping_fileoff == 0
), "First mapping of the shared cache is not at the start of the shared cache"
slide = self.base - mapping_base
assert slide >= 0, "Slide value is negative, but we don't expect it to be"
return slide
@property
def image_index_trie(self) -> Trie[int] | None:
"""
The trie of image indices, if available.
"""
if self._header_size() <= 0x110:
return None
trie_unslid = pwndbg.aglib.memory.u64(self.addr + 0x108)
trie_ptr = trie_unslid + self.slide
return Trie(trie_ptr, _uleb128_ty)
def image_base(self, index: int):
assert self.image_count > index
return pwndbg.aglib.memory.u64(self._images_base + index * 0x20)
def image_name(self, index: int):
assert self.image_count > index
return pwndbg.aglib.memory.string(
self.addr + pwndbg.aglib.memory.u32(self._images_base + index * 0x20 + 0x18)
)
@property
def images(self) -> Generator[Tuple[bytes, int]]:
# This is a little convoluted, but this function is quite hot and
# calling the debugger can be quite slow, so pulling in the whole array
# at once goes a really long way.
#
# Yes, even with the extra logic. Python is slow, but it's not as
# slow as calling LLDB an extra time on every iteration.
data = pwndbg.aglib.memory.read(self._images_base, 0x20 * self.image_count)
for i in range(self.image_count):
base = i * 0x20
yield (
pwndbg.aglib.memory.string(
self.addr + struct.unpack("<I", data[base + 0x18 : base + 0x1C])[0]
),
struct.unpack("<Q", data[base : base + 8])[0],
)
@property
def images_sorted(self) -> Generator[Tuple[bytes, int]]:
"Same as images, but guaranteed to be sorted by increasing base address"
if self._images_sorted_by_address:
# The images are naturally sorted by increasing base address.
#
# This should be true the _vast_ majority of the time, and perhaps
# even all the time. Just connect the generators.
yield from self.images
else:
# The images are sorted in some other order.
#
# This should be very rare, but we shoulnd't fail if it happens.
# Unlike the other cases in which we have to choose whether to fail
# at or gracefully handle a weird condition, libmacho doesn't seem
# to rely on this being the case.
images = list(self.images)
images.sort(key=lambda image: image[1])
yield from iter(images)
def is_address_in_shared_cache(self, addr: int) -> int:
"""
Whether the given address is in the shared cache.
"""
return addr >= self.base and addr < self.base + self.size
def objc_builtin_selectors(self) -> DyldSharedCacheHashSet:
"""
Looks up the hash table of builtin Objective-C selectors and returns it.
"""
if self._header_size() > 0x1D8:
# Use `objcOptsOffset` and the new Objective-C optimizations header
# to find the address of the symbol hash set.
objc_opt_offset = pwndbg.aglib.memory.u64(self.addr + 0x1D0)
objc_opt_ptr = self.addr + objc_opt_offset
offset = pwndbg.aglib.memory.u64(objc_opt_ptr + 0x18)
ptr = self.addr + offset
# Technically possible, but we have *no* idea what to do if this
# happens, and it's more likely that we got something wrong.
assert (
offset != 0
), "Tried to query builtin selector identity, but have no Objective-C optimization header?"
else:
raise NotImplementedError(
"Objective-C optimization queries are not yet supported for shared caches that have no objcOptsOffset value"
)
return DyldSharedCacheHashSet(ptr)
@pwndbg.lib.cache.cache_until("exit")
def shared_cache() -> DyldSharedCache | None:
"""
Base address of the Darwin shared cache.
In Darwin, the way the Objective-C Runtime queries for this value is to call
`_dyld_get_shared_cache_range` from libdyld[1], which then calls a routine
that lives inside dyld itself, and that returns the values after poking into
internal C++ structures.
From our perspective, that kind of sucks. Calling routines from debuggers
can be quite unreliable, and so ideally we'd always be peeking into the data
structures directly. But, in this case, even for Apple these are considered
entirely private to dyld[2], and so there's even less of a stability guarantee
for the layout of these structures than normal.
Because of this, a level of care must be taken before calling this function,
as it must be assumed that the state of the inferior can be changed by it.
[1]: https://github.com/apple-oss-distributions/objc4/blob/f126469408dc82bd3f327217ae678fd0e6e3b37c/runtime/objc-opt.mm#L434
[2]: https://github.com/apple-oss-distributions/dyld/blob/main/doc/dyld4.md#libdylddylib
"""
base = int(
pwndbg.dbg.selected_inferior().evaluate_expression(
"(const void*)_dyld_get_shared_cache_range()"
)
)
if base == 0:
return None
return DyldSharedCache(base)

@ -0,0 +1,906 @@
"""
Apple Objective-C Runtime Support
This module implements support for analyzing the Apple Objective-C runtime. As
expected, Apple provides no oficial specification for the internal ABI of ObjC
and no guarantees of its stability, and so this module is not guaranteed to
work on all versions of Darwin.
"""
from __future__ import annotations
from typing import Callable
from typing import Generator
from typing import Generic
from typing import TypeVar
from typing_extensions import override
import pwndbg
import pwndbg.aglib.arch
import pwndbg.aglib.macho
import pwndbg.aglib.memory
import pwndbg.aglib.symbol
import pwndbg.aglib.typeinfo
T = TypeVar("T")
class _IdRaw:
"""
Pointer to an Objective-C object in the heap.
"""
def __init__(self, ptr: int):
self.addr = ptr
class _IdTagged:
"""
Tagged pointer to an Objective-C object.
This is a bit of a misnomer, as tagged pointers may not be pointers at all,
and the data for the entire object may be contained in the payload, with no
backing allocation in the heap. It is up to the class to determine how to
decode the payload properly.
"""
def __init__(self, tag: int, payload: int, extended: bool):
self.tag = tag
self.payload = payload
self.extended = extended
def lookup_class(self) -> Class:
"""
Looks up the class object matching the tag in this pointer.
"""
classes = _tagged_pointer_classes()
if self.extended:
classes += self.tag - 256
else:
classes += self.tag
ptr = pwndbg.aglib.memory.read_pointer_width(int(classes.address))
ptr = _ptrauth_strip(ptr)
return Class(ptr)
class _IsaPtr:
"""
Pointer to an `isa_t` structure.
"""
ISA_MASK = 0x0000000FFFFFFFF8
"Mask of bits containing just the authenticated class pointer."
def __init__(self, addr: int):
self._addr = addr
def _read(self) -> int:
"""
Read the bits of the `isa_t` structure into an integer.
"""
return pwndbg.aglib.memory.read_pointer_width(self._addr)
def get_class(self) -> Class:
ptr = self._read() & _IsaPtr.ISA_MASK
ptr = _ptrauth_strip(ptr)
return Class(ptr)
def _isa_class_mask() -> int:
return pwndbg.aglib.memory.read_pointer_width(
pwndbg.aglib.symbol.lookup_symbol_addr("objc_debug_isa_class_mask")
)
class _ClassRoPtr:
RO_META = 0x1
RO_ROOT = 0x2
RO_HAS_CXX_STRUCTORS = 0x4
RO_HIDDEN = 0x10
RO_EXCEPTION = 0x20
RO_HAS_SWIFT_INITIALIZER = 0x40
RO_IS_ARC = 0x80
RO_HAS_CXX_DTOR_ONLY = 0x100
RO_HAS_WEAK_WITHOUT_ARC = 0x200
RO_FORBIDS_ASSOCIATED_OBJECTS = 0x400
RO_FROM_BUNDLE = 0x20000000
RO_FUTURE = 0x40000000
RO_REALIZED = 0x80000000
def __init__(self, addr: int):
self._ptr = addr
def name(self) -> bytes:
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 24)
return pwndbg.aglib.memory.string(ptr)
def flags(self) -> int:
return pwndbg.aglib.memory.u32(self._ptr)
def methods(self) -> Generator[Method]:
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 32)
if ptr & 1 == 0:
if ptr == 0:
return
yield from _MethodList(ptr).entries()
else:
if ptr & ~1 == 0:
# Not expected to happen, but better safe than sorry.
return
list_of_lists = _RelativeListOfLists(_MethodList, ptr & ~1)
for lst in list_of_lists.entries():
if lst is None:
continue
yield from lst.get_list().entries()
def ivars(self) -> Generator[InstanceVariable]:
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 0x30)
if ptr != 0:
yield from _IVarList(ptr).entries()
def properties(self) -> Generator[ClassProperty]:
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 0x40)
if ptr & 1 == 0:
if ptr == 0:
return
yield from _ClassPropertyList(ptr).entries()
else:
if ptr & ~1 == 0:
# Not expected to happen, but better safe than sorry.
return
list_of_lists = _RelativeListOfLists(_ClassPropertyList, ptr & ~1)
for lst in list_of_lists.entries():
if lst is None:
continue
yield from lst.get_list().entries()
class _ClassRwExtPtr:
def __init__(self, ptr: int):
self._ptr = ptr
def ro(self) -> _ClassRoPtr:
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr)
ptr = _ptrauth_strip(ptr)
return _ClassRoPtr(ptr)
def methods(self) -> _ListArray[Method]:
return _ListArray(_MethodList, self._ptr + pwndbg.aglib.typeinfo.ptrsize)
def properties(self) -> _ListArray[ClassProperty]:
return _ListArray(_ClassPropertyList, self._ptr + 2 * pwndbg.aglib.typeinfo.ptrsize)
def demangled_name(self) -> bytes | None:
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 4 * pwndbg.aglib.typeinfo.ptrsize)
if ptr == 0:
return None
return pwndbg.aglib.memory.string(ptr)
def version(self) -> int:
return pwndbg.aglib.memory.u32(self._ptr + 5 * pwndbg.aglib.typeinfo.ptrsize)
class _ClassRwPtr:
RW_REALIZED = 1 << 31
def __init__(self, ptr: int):
self._ptr = ptr
def ro_or_rw_ext(self) -> _ClassRoPtr | _ClassRwExtPtr:
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 8)
if ptr & 1 == 1:
return _ClassRwExtPtr(ptr & ~1)
else:
return _ClassRoPtr(ptr)
class _ClassDataBitsPtr:
"""
Pointer to a `class_data_bits_t` structure.
"""
FAST_IS_RW_POINTER = 0x8000000000000000
FAST_IS_SWIFT_LEGACY = 0x1
FAST_IS_SWIFT_STABLE = 0x2
FAST_HAS_DEFAULT_RR = 0x4
FAST_DATA_MASK = 0x0F007FFFFFFFFFF8
def __init__(self, ptr: int):
self._ptr = ptr
def data(self) -> _ClassRoPtr | _ClassRwPtr:
if self._is_rw():
return _ClassRwPtr(self._data_addr())
return _ClassRoPtr(self._data_addr())
def _is_rw(self) -> bool:
return ((self._ptr & _ClassDataBitsPtr.FAST_IS_RW_POINTER) != 0) or (
(self._flags() & _ClassRwPtr.RW_REALIZED) != 0
)
def _data_addr(self) -> int:
return _ptrauth_strip(self._ptr) & _ClassDataBitsPtr.FAST_DATA_MASK
def _flags(self) -> int:
return pwndbg.aglib.memory.u32(self._data_addr())
class _EntList(Generic[T]):
"""
Entity list.
"""
_flags_mask: int = 0
"Mask for the flag bits of `entsizeAndFlags`"
def __init__(self, ptr: int):
self._addr = self._addr_from_ptr(ptr)
self._ptr = ptr
def _entsize_and_flags(self) -> int:
return pwndbg.aglib.memory.u32(self._addr)
def _entries(self) -> int:
return pwndbg.aglib.memory.u32(self._addr + 4)
def flags(self) -> int:
return self._entsize_and_flags() & self._flags_mask
def entsize(self) -> int:
return self._entsize_and_flags() & ~self._flags_mask
def _modify_pointer(self, ptr: int) -> int:
return ptr
def _from_ptr(self, ptr: int) -> T:
"""
Build the type of this list from a pointer.
Must be implemented by the specialized class.
"""
raise NotImplementedError()
def _addr_from_ptr(self, ptr: int) -> int:
"""
Strip any metadata from the pointer to this list.
Must be implemented by the specialized class.
"""
raise NotImplementedError()
def __len__(self) -> int:
return self._entries()
def get(self, i: int) -> T:
if i >= len(self):
raise IndexError(f"Index {i} is out-of-range for entlist with {len(self)} entries")
return self._from_ptr(self._modify_pointer(self._addr + 8 + i * self.entsize()))
def entries(self) -> Generator[T]:
for i in range(len(self)):
yield self.get(i)
class _RelativeListOfListsEntry(Generic[T]):
def __init__(self, ty: Callable[[int], _EntList[T]], ptr: int):
self._ptr = ptr
self._ty = ty
def image_index(self) -> int:
return pwndbg.aglib.memory.u64(self._ptr) & 0xFFFF
def _list_offset(self) -> int:
return pwndbg.aglib.memory.s64(self._ptr) >> 16
def get_list(self) -> _EntList[T]:
return self._ty(self._ptr + self._list_offset())
class _RelativeListOfLists(
_EntList[_RelativeListOfListsEntry[T] | None],
Generic[T],
):
"""
An array of relative pointers to lists.
This corresponds to the `relative_list_list_t` type in libobjc.
"""
def __init__(self, ty: Callable[[int], _EntList[T]], ptr: int):
super().__init__(ptr)
self._ty = ty
@override
def _addr_from_ptr(self, ptr: int) -> int:
# Top-Byte-Ignore is assumed for method lists, but method list pointers
# may have metadata attached to them.
return ptr & ~0xFF00000000000000
@override
def _from_ptr(self, ptr: int) -> _RelativeListOfListsEntry[T] | None:
entry = _RelativeListOfListsEntry(self._ty, ptr)
if not _header_info_rw_is_image_loaded(entry.image_index()):
# The entry is only valid if its corresponding image has been marked
# as loaded in `objc_debug_headerInfoRWs`.
return None
return entry
class _ListArray(Generic[T]):
"""
A runtime-polymorphic array type for lists. May be a pointer to a list type,
an array of pointers, or a _RelativeListOfLists, distinguished by a tag in
a pointer.
Strangely for Apple, the tagged pointer to the final list is contained
inside the list array structure, rather than having the whole structure be
inlined into a pointer value. Suspiciously sane.
This corresponds to the `list_array_tt` type in libobjc.
"""
def __init__(self, ty: Callable[[int], _EntList[T]], ptr: int):
self._ptr = ptr
self._ty = ty
def entries(self) -> Generator[T]:
raw_ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr)
tag = raw_ptr & 3
ptr = raw_ptr & ~3
if ptr == 0:
return
if tag == 0:
# This is just a pointer to the list.
yield from self._ty(ptr).entries()
elif tag == 1:
# This is an array of lists.
count = pwndbg.aglib.memory.u32(ptr)
for i in range(count):
yield from self._ty(
pwndbg.aglib.memory.read_pointer_width(
ptr + 8 + i * pwndbg.aglib.typeinfo.ptrsize
)
).entries()
elif tag == 2:
# This is a relative list of lists.
for ll in _RelativeListOfLists(self._ty, ptr).entries():
yield from ll.get_list().entries()
def _header_info_rw_is_image_loaded(index: int) -> bool:
"""
Queries `objc_debug_headerInfoRWs` and checks whether the image with the
given index is loaded.
"""
addr = pwndbg.aglib.memory.read_pointer_width(
pwndbg.aglib.symbol.lookup_symbol_addr("objc_debug_headerInfoRWs")
)
count = pwndbg.aglib.memory.u32(addr)
entsize = pwndbg.aglib.memory.u32(addr + 4)
if index >= count:
raise IndexError(
f"Image index {index} is out-of-bounds for headerInfoRWs structure with {count} entries"
)
return pwndbg.aglib.memory.read_pointer_width(addr + 8 + entsize * index) & 1 == 1
def _tagged_pointer_classes() -> pwndbg.dbg_mod.Value:
"""
The Objective-C runtime tagged pointer classs list.
The classes to which the tag values in a tagged pointer corresponds are not
fixed, and are instead stored in a runtime-global array that gets looked up
when a message is sent.
"""
return pwndbg.aglib.symbol.lookup_symbol("objc_debug_taggedpointer_classes").cast(
pwndbg.aglib.typeinfo.void.pointer().pointer()
)
def _ptr_obfuscation_value() -> int:
"""
The Objective-C runtime obfuscates tagged pointer values.
"""
return pwndbg.aglib.memory.read_pointer_width(
pwndbg.aglib.symbol.lookup_symbol_addr("objc_debug_taggedpointer_obfuscator")
)
def _try_decode_tagged_split(ptr: int) -> _IdTagged | None:
"""
Decodes a tagged pointer encoded in the split-tag scheme, if it is tagged.
This is the encoding scheme used in modern - iOS 14 and newer - ARM64
platforms.
If the pointer is not tagged, returns `None`.
"""
if ptr & 0x8000000000000000 == 0:
# Not a tagged pointer.
return None
if ptr & 7 == 7:
# This is an extended tag with a 52-bit payload.
tag = (ptr >> 55) & 0xFF
payload = (ptr >> 3) & 0xFFFFFFFFFFFFF
extended = True
else:
# This is a short tag with a 60-bit payload.
tag = ptr & 7
payload = (ptr >> 3) & 0xFFFFFFFFFFFFFFF
extended = False
return _IdTagged(tag, payload, extended)
def _try_decode_tagged_lsb(ptr: int) -> _IdTagged:
"""
Decodes a tagged pointer encoded in the LSB-tag scheme, if it is tagged.
This is the encoding scheme used in all x86-64 versions of Darwin.
If the pointer is not tagged, returns `None`.
"""
if ptr & 1 == 0:
# Not a tagged pointer.
return None
if ptr & 14 == 14:
# This is an extended tag with a 52-bit payload.
tag = (ptr >> 4) & 0xFF
payload = ptr >> 12
extended = True
else:
# This is a short tage with a 60-bit payload.
tag = (ptr >> 1) & 7
payload = ptr >> 4
extended = False
return _IdTagged(tag, payload, extended)
def _decode_prog_id(ptr: int) -> _IdRaw | _IdTagged:
"""
Given an Objective-C program, decode it.
"""
# First, check for tagged pointers.
tagged = None
match pwndbg.aglib.arch.name:
case "aarch64":
tagged = _try_decode_tagged_split(ptr)
case "x86-64":
tagged = _try_decode_tagged_lsb(ptr)
case other:
raise AssertionError(f"Unexpected Objective-C architecture: {other}")
if tagged is not None:
# Successfuly decoded the tagged pointer.
return tagged
# This is a direct pointer.
return _IdRaw(ptr)
def _ptrauth_strip(ptr: int) -> int:
"""
Strip pointer signing information from a given signed pointer.
"""
return ptr & 0xFFFFFFFFFFFF
class Object:
_addr: int
"Object pointer value, as seen in the program. May be tagged, obfuscated, authenticated."
_id: _IdRaw | _IdTagged
"Decoded object pointer value. May be tagged."
def __init__(self, addr: int):
self._addr = addr
self._id = _decode_prog_id(addr)
@property
def cls(self) -> Class | None:
if isinstance(self._id, _IdRaw):
isa = _IsaPtr(self._id.addr)
return isa.get_class()
elif isinstance(self._id, _IdTagged):
return self._id.lookup_class()
class Class(Object):
def __init__(self, addr: int):
super().__init__(addr)
assert isinstance(self._id, _IdRaw), "Class pointers are never tagged"
def _data_bits(self) -> _ClassDataBitsPtr:
# MyPy fails if we don't check this a second time.
assert isinstance(self._id, _IdRaw), "Class pointers are never tagged"
ptr = pwndbg.aglib.memory.read_pointer_width(self._id.addr + 32)
ptr = _ptrauth_strip(ptr)
return _ClassDataBitsPtr(ptr)
def _ro(self) -> _ClassRoPtr:
data = self._data_bits().data()
if isinstance(data, _ClassRoPtr):
return data
elif isinstance(data, _ClassRwPtr):
ro_or_rw_ext = data.ro_or_rw_ext()
if isinstance(ro_or_rw_ext, _ClassRwExtPtr):
return ro_or_rw_ext.ro()
elif isinstance(ro_or_rw_ext, _ClassRoPtr):
return ro_or_rw_ext
else:
# FIXME: Should be `typing.assert_never`, needs Python 3.11
assert False
else:
# FIXME: Should be `typing.assert_never`, needs Python 3.11
assert False
def _rw_ext(self) -> _ClassRwExtPtr | None:
data = self._data_bits().data()
if isinstance(data, _ClassRoPtr):
return None
elif isinstance(data, _ClassRwPtr):
ro_or_rw_ext = data.ro_or_rw_ext()
if isinstance(ro_or_rw_ext, _ClassRwExtPtr):
return ro_or_rw_ext
elif isinstance(ro_or_rw_ext, _ClassRoPtr):
return None
else:
# FIXME: Should be `typing.assert_never`, needs Python 3.11
assert False
else:
# FIXME: Should be `typing.assert_never`, needs Python 3.11
assert False
@property
def superclass(self) -> Class | None:
# MyPy fails if we don't check this a second time.
assert isinstance(self._id, _IdRaw), "Class pointers are never tagged"
if self._ro().flags() & _ClassRoPtr.RO_ROOT != 0:
# This is a root class, and thus has no superclass.
return None
ptr_addr = self._id.addr + pwndbg.aglib.typeinfo.ptrsize
ptr = pwndbg.aglib.memory.read_pointer_width(ptr_addr)
ptr = _ptrauth_strip(ptr)
return Class(ptr)
@property
def name(self) -> bytes:
return self._ro().name()
@property
def methods(self) -> Generator[Method]:
if (rw_ext := self._rw_ext()) is not None:
# Return the methods added to the class at runtime from the Class
# R/W structure, which also include the base methods.
yield from rw_ext.methods().entries()
else:
# Return the base methods.
yield from self._ro().methods()
@property
def ivars(self) -> Generator[InstanceVariable]:
yield from self._ro().ivars()
@property
def properties(self) -> Generator[ClassProperty]:
if (rw_ext := self._rw_ext()) is not None:
# Return the properties added to the class at runtime from the Class
# R/W structure, which also include the base properties.
yield from rw_ext.properties().entries()
else:
# Return the base properties.
yield from self._ro().properties()
@property
def is_metaclass(self) -> bool:
return (self._ro().flags() & _ClassRoPtr.RO_META) != 0
@override
@property
def cls(self) -> Class | None:
if self.is_metaclass:
# Following this pointer in metaclasses is weird. Users are better
# served following the superclass chain, instead.
return None
return super().cls
class InstanceVariable:
"""
An Objective-C Instance Variable.
Instance Variables are NOT objects!
"""
def __init__(self, ptr: int):
self._ptr = ptr
@property
def offset(self) -> int:
"""
The offset in bytes of this value from the start of the object instance.
"""
return pwndbg.aglib.memory.s32(pwndbg.aglib.memory.read_pointer_width(self._ptr))
@property
def name(self) -> bytes:
"""
The name of this instance variable.
"""
return pwndbg.aglib.memory.string(
pwndbg.aglib.memory.read_pointer_width(self._ptr + pwndbg.aglib.typeinfo.ptrsize)
)
@property
def typename(self) -> bytes:
"""
The name of the type of this instance variable.
"""
return pwndbg.aglib.memory.string(
pwndbg.aglib.memory.read_pointer_width(self._ptr + pwndbg.aglib.typeinfo.ptrsize * 2)
)
@property
def alignment(self) -> int:
"""
The alignment of this instance variable, in bytes.
"""
align_log2 = pwndbg.aglib.memory.u32(self._ptr + pwndbg.aglib.typeinfo.ptrsize * 3)
# All ones indicates the natural alignment of a pointer.
if align_log2 == 0xFFFFFFFF:
return pwndbg.aglib.typeinfo.ptrsize
return 1 << align_log2
@property
def size(self) -> int:
"""
The size of this instance variable, in bytes.
"""
return pwndbg.aglib.memory.u32(self._ptr + pwndbg.aglib.typeinfo.ptrsize * 3 + 4)
class ClassProperty:
def __init__(self, ptr: int):
self._ptr = ptr
@property
def name(self) -> bytes:
"""
The name of this class property.
"""
return pwndbg.aglib.memory.string(pwndbg.aglib.memory.read_pointer_width(self._ptr))
@property
def value(self) -> bytes:
"""
The value of this property.
"""
return pwndbg.aglib.memory.string(
pwndbg.aglib.memory.read_pointer_width(self._ptr + pwndbg.aglib.typeinfo.ptrsize)
)
class Selector:
"""
An Objective-C Selector.
Selectors are NOT objects!
"""
def __init__(self, ptr: int):
self._ptr = ptr
@property
def name(self) -> bytes:
"""
Retrieves the name of this selector.
"""
# In the Apple Objective-C runtime, selectors are human-readable strings
# with unique identities[1]. The identity is simply the pointer to the
# string itself, guaranteed by the tooling to be unique. To read the
# name of the selector, then, we can simply follow its identity pointer.
#
# [1]: https://web.archive.org/web/20161010081824/http://unixjunkie.blogspot.com/2006/02/nil-and-nil.html
return pwndbg.aglib.memory.string(self._ptr)
class Method:
"""
An Objective-C Method Pointer.
Methods are NOT objects!
A method pointer can be one of three types: Small, small direct, and big.
Pointer types are distinguished by the two least significant bits in the
integer representation of the pointer. A value of `1` is used for both small
pointer types, while all other values are used to distinguish between the
signing nuances of big pointers.
Small pointers 32-bit wide and relative to a given base value. Big pointers
contain the pointers themselves, and they may or may not be signed.
Small direct pointers are small pointers that reside in the shared cache, and
their selectors are relative to @selector(🤯), while the selectors of regular
small pointers are relative to the pointers themselves.
"""
def __init__(self, ptr: int):
self._ptr = ptr
@property
def sel(self) -> Selector:
"The selector this method responds to."
kind = self._ptr & 3
base = self._ptr & ~3
if kind == 1:
if pwndbg.aglib.macho.shared_cache().is_address_in_shared_cache(base):
# To resolve selectors of small method pointers in the shared cache,
# we have to look up the identity of @selector(🤯).
rel = (
pwndbg.aglib.macho.shared_cache()
.objc_builtin_selectors()
.lookup("🤯".encode("utf-8"))
)
ptr = rel + pwndbg.aglib.memory.s32(base)
else:
offset = pwndbg.aglib.memory.s32(base)
ref = base + offset
# Non-shared cache values are pointers to selectors.
ptr = pwndbg.aglib.memory.read_pointer_width(ref)
return Selector(ptr)
else:
return Selector(_ptrauth_strip(pwndbg.aglib.memory.read_pointer_width(base)))
@property
def types(self) -> bytes:
"The types of the arguments to this method."
kind = self._ptr & 3
base = self._ptr & ~3
if kind == 1:
ptr = base + 4
offset = pwndbg.aglib.memory.s32(ptr)
addr = ptr + offset
else:
ptr = base + 8
addr = _ptrauth_strip(
pwndbg.aglib.memory.read_pointer_width(base + pwndbg.aglib.typeinfo.ptrsize)
)
return pwndbg.aglib.memory.string(addr)
@property
def imp(self) -> int:
"The pointer to the function that implements this method."
kind = self._ptr & 3
base = self._ptr & ~3
if kind == 1:
# There's a bit of nuance here.
#
# Method swizzling for small pointers is implemented using a global
# hash map of method pointers to implementation pointers. When
# getting the IMP pointer for a small pointer, the runtime will
# first check the global hash map to see if the method has been
# swizzled, and return the swizzled method if it has. The runtime
# will do what we do here if method has not been swizzled.
#
# Currently, we have no good way to query this map, and no other way
# to detect that a method has been swizzled, so swizzles to small
# pointers are unfortunately compeltely invisible to us.
#
# TODO: Handle method swizzles for small-pointer-type Objective-C methods.
ptr = base + 8
offset = pwndbg.aglib.memory.s32(ptr)
return ptr + offset
else:
ptr = base + 16
return _ptrauth_strip(
pwndbg.aglib.memory.read_pointer_width(base + pwndbg.aglib.typeinfo.ptrsize)
)
class _MethodList(_EntList[Method]):
"""
Method entity list.
"""
_flags_mask = 0xFFFF0003
SMALL_METHOD_LIST_FLAG = 0x80000000
"Indicates that the pointers in this list are small method pointers."
BIG_SIGNED_METHOD_LIST_FLAG = 0x8000000000000000
"""
Indicates that the pointers in this list are big and signed.
Stored as part of the pointer to the method list, rather than in the flags
field, as is the case with other flags.
"""
@override
def _modify_pointer(self, ptr: int) -> int:
if self.flags() & self.SMALL_METHOD_LIST_FLAG != 0:
# This is a small pointer list.
return (ptr & ~3) | 1
elif self._ptr & self.BIG_SIGNED_METHOD_LIST_FLAG:
# This is a big signed poitner list.
return (ptr & ~3) | 2
else:
# No tag or flag. This is a big pointer list.
return ptr & ~3
@override
def _addr_from_ptr(self, ptr: int) -> int:
# Top-Byte-Ignore is assumed for method lists, but method list pointers
# may have metadata attached to them.
return ptr & ~0xFF00000000000000
@override
def _from_ptr(self, ptr: int) -> Method:
return Method(ptr)
class _IVarList(_EntList[InstanceVariable]):
"IVar entity list."
_flags_mask = 0
@override
def _modify_pointer(self, ptr: int) -> int:
return ptr
@override
def _addr_from_ptr(self, ptr: int) -> int:
return ptr
@override
def _from_ptr(self, ptr: int) -> InstanceVariable:
return InstanceVariable(ptr)
class _ClassPropertyList(_EntList[ClassProperty]):
"Class property entity list."
_flags_mask = 0
@override
def _modify_pointer(self, ptr: int) -> int:
return ptr
@override
def _addr_from_ptr(self, ptr: int) -> int:
return ptr
@override
def _from_ptr(self, ptr: int) -> ClassProperty:
return ClassProperty(ptr)
Loading…
Cancel
Save