mirror of https://github.com/pwndbg/pwndbg.git
Add initial support for Objective-C and Mach-O on Darwin (#3249)
* Initial Apple Objective-C ABI support * Add support for instance variables and class properties * Add support for read-write object properties * Add trie parsing and image listing support for Mach-O * Add sorted iterator to DYLD Shared Cache image listing * Address feedbackpull/3258/head
parent
260a7204a7
commit
fa566efa1c
@ -0,0 +1,668 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import itertools
|
||||
import struct
|
||||
from typing import Callable
|
||||
from typing import Generator
|
||||
from typing import Generic
|
||||
from typing import Tuple
|
||||
from typing import TypeVar
|
||||
|
||||
import pwndbg
|
||||
import pwndbg.aglib.memory
|
||||
|
||||
|
||||
def _uleb128(ptr: int) -> Tuple[int, int]:
|
||||
"""
|
||||
Decode a ULEB128 value at the start of the given address, and return the
|
||||
decoded number, along with how many bytes the entire number takes.
|
||||
"""
|
||||
acc = 0
|
||||
i = 0
|
||||
while True:
|
||||
byte = pwndbg.aglib.memory.u8(ptr + i)
|
||||
|
||||
acc |= (byte & 0x7F) << (7 * i)
|
||||
if byte & 0x80 == 0:
|
||||
# This is the terminator byte.
|
||||
break
|
||||
|
||||
i += 1
|
||||
|
||||
return acc, i + 1
|
||||
|
||||
|
||||
class _RawTrie:
|
||||
"""
|
||||
This is the untyped base implementation of Trie.
|
||||
"""
|
||||
|
||||
def __init__(self, ptr: int):
|
||||
self._ptr = ptr
|
||||
|
||||
def _walk(
|
||||
self,
|
||||
offset: int,
|
||||
acc: bytes,
|
||||
edgesel: Callable[[bytes, bytes], bool],
|
||||
nodesel: Callable[[bytes], bool],
|
||||
) -> Generator[Tuple[bytes, int, int]]:
|
||||
"""
|
||||
Walk the trie.
|
||||
|
||||
Allows callers to select edges for exploration and nodes for yielding
|
||||
through the `edgesel` and `nodesel` callables.
|
||||
|
||||
At every edge, this function will call `edgesel` with the currently
|
||||
accumulated name and the name associated with the edge, and will take
|
||||
action according to the value it returns. If it returns True, that edge
|
||||
will be explored, otherwise, the edge will be ignored.
|
||||
|
||||
At every node, this function will call `nodesel` with the currently
|
||||
accumulated name. If it returns True, the node will be yielded,
|
||||
otherwise, it will be ignored.
|
||||
|
||||
Yielded node information consists of a tuple of (name, ptr, length),
|
||||
where `name` is the name of the node, `ptr` is the address of the first
|
||||
byte of its associated data, and `length` is the length of its
|
||||
associated data, in bytes.
|
||||
"""
|
||||
base = self._ptr + offset
|
||||
|
||||
node_data_len, node_data_len_len = _uleb128(base)
|
||||
if node_data_len != 0 and nodesel(acc):
|
||||
# The user selected this node, stop the walk here.
|
||||
yield acc, base + node_data_len_len, node_data_len
|
||||
|
||||
cursor = base + node_data_len_len + node_data_len
|
||||
|
||||
# The number of children is NOT a ULEB128.
|
||||
children = pwndbg.aglib.memory.u8(cursor)
|
||||
cursor += 1
|
||||
|
||||
for _ in range(children):
|
||||
name = pwndbg.aglib.memory.string(cursor)
|
||||
cursor += len(name) + 1
|
||||
|
||||
child_offset, child_offset_len = _uleb128(cursor)
|
||||
cursor += child_offset_len
|
||||
|
||||
if edgesel(acc, name):
|
||||
yield from self._walk(child_offset, acc + name, edgesel, nodesel)
|
||||
|
||||
# The cursor is already at the next child.
|
||||
|
||||
def _get_raw(self, name: bytes) -> Tuple[bytes, int, int] | None:
|
||||
"""
|
||||
Get the data associated with the node of given name, if it exists.
|
||||
"""
|
||||
|
||||
def nodesel(candidate: bytes) -> bool:
|
||||
return candidate == name
|
||||
|
||||
def edgesel(acc: bytes, candidate: bytes) -> bool:
|
||||
return name[len(acc) :].startswith(candidate)
|
||||
|
||||
return next(self._walk(0, b"", edgesel, nodesel), None)
|
||||
|
||||
def _entries_raw(self) -> Generator[Tuple[bytes, int, int]]:
|
||||
"""
|
||||
List all the entries in the trie, along with their associated data.
|
||||
"""
|
||||
yield from self._walk(0, b"", lambda _acc, _candidate: True, lambda _candidate: True)
|
||||
|
||||
def keys(self) -> Generator[bytes]:
|
||||
"""
|
||||
List the name of all nodes in the trie.
|
||||
"""
|
||||
yield from (name for name, _ptr, _size in self._entries_raw())
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class Trie(_RawTrie, Generic[T]):
|
||||
"""
|
||||
Prefix Tree
|
||||
|
||||
The Mach-O format makes extensive use of prefix trees for any operation that
|
||||
involves string-based loookup.
|
||||
"""
|
||||
|
||||
def __init__(self, ptr: int, ty: Callable[[int, int], T]):
|
||||
super().__init__(ptr)
|
||||
self._ty = ty
|
||||
|
||||
def get(self, name: bytes) -> T | None:
|
||||
"""
|
||||
Get the data associated with the node of given name, if it exists.
|
||||
"""
|
||||
_, ptr, size = self._get_raw(name)
|
||||
return self._ty(ptr, size)
|
||||
|
||||
def entries(self) -> Generator[Tuple[bytes, T]]:
|
||||
"""
|
||||
List all the entries in the trie, along with their associated data.
|
||||
"""
|
||||
yield from ((name, self._ty(ptr, size)) for name, ptr, size in self._entries_raw())
|
||||
|
||||
|
||||
def _uleb128_ty(ptr: int, size: int) -> int:
|
||||
"The type function of ULEB128 associated data, for use with Trie"
|
||||
|
||||
value, actual_size = _uleb128(ptr)
|
||||
|
||||
# Can fail if the type is wrong or the trie is corrupted.
|
||||
assert size == actual_size, "Size mismatch while validating ULEB128"
|
||||
|
||||
return value
|
||||
|
||||
|
||||
class DyldSharedCacheMapping:
|
||||
def __init__(self, addr: int, size: int, file_offset: int, max_prot: int, init_prot: int):
|
||||
self.addr = addr
|
||||
self.size = size
|
||||
self.file_offset = file_offset
|
||||
self.max_prot = max_prot
|
||||
self.init_prot = init_prot
|
||||
|
||||
|
||||
def _lookup8(blob: bytes, level: int) -> int:
|
||||
"""
|
||||
Hashes a variable-length byte array into a 64-bit integer.
|
||||
|
||||
Apple uses a variation of an algorithm published by Bob Jenkins in 1997 on
|
||||
Dr. Dobb's Journal, and later republished on their website under the title
|
||||
"The Hash"[1]. The version used by Apple was also written by Jenkins[2], but
|
||||
does not seem to be mentioned in any of their articles, so I don't couldn't
|
||||
gather much information about it besides that it looks like a 64-bit variant
|
||||
of the algorithm in the article.
|
||||
|
||||
This function is a direct Python port of the algorithm in [2].
|
||||
|
||||
[1]: https://burtleburtle.net/bob/hash/doobs.html
|
||||
[2]: https://burtleburtle.net/bob/c/lookup8.c
|
||||
"""
|
||||
blob = bytearray(blob)
|
||||
orig_len = len(blob)
|
||||
|
||||
a = level
|
||||
b = level
|
||||
c = 0x9E3779B97F4A7C13
|
||||
|
||||
padded = False
|
||||
while True:
|
||||
blob_len = len(blob)
|
||||
if blob_len == 0:
|
||||
if not padded:
|
||||
# We need to mix one more time if the blob was not padded.
|
||||
c += orig_len
|
||||
a, b, c = _mix64(a, b, c)
|
||||
|
||||
break
|
||||
|
||||
if blob_len < 24:
|
||||
# If the length of the blob is not divisible by 24, we pad it out
|
||||
# with zeroes until it is.
|
||||
#
|
||||
# We must be careful so as to always insert a zero at index 16,
|
||||
# which corresponds with the reservation of the length in `c` in the
|
||||
# original C code.
|
||||
c += orig_len
|
||||
|
||||
blob.extend(b"\0" * (23 - blob_len))
|
||||
blob.insert(16, 0)
|
||||
|
||||
padded = True
|
||||
|
||||
a += (
|
||||
blob[0]
|
||||
+ (blob[1] << 8)
|
||||
+ (blob[2] << 16)
|
||||
+ (blob[3] << 24)
|
||||
+ (blob[4] << 32)
|
||||
+ (blob[5] << 40)
|
||||
+ (blob[6] << 48)
|
||||
+ (blob[7] << 56)
|
||||
)
|
||||
b += (
|
||||
blob[8]
|
||||
+ (blob[9] << 8)
|
||||
+ (blob[10] << 16)
|
||||
+ (blob[11] << 24)
|
||||
+ (blob[12] << 32)
|
||||
+ (blob[13] << 40)
|
||||
+ (blob[14] << 48)
|
||||
+ (blob[15] << 56)
|
||||
)
|
||||
c += (
|
||||
blob[16]
|
||||
+ (blob[17] << 8)
|
||||
+ (blob[18] << 16)
|
||||
+ (blob[19] << 24)
|
||||
+ (blob[20] << 32)
|
||||
+ (blob[21] << 40)
|
||||
+ (blob[22] << 48)
|
||||
+ (blob[23] << 56)
|
||||
)
|
||||
|
||||
a %= 0x10000000000000000
|
||||
b %= 0x10000000000000000
|
||||
c %= 0x10000000000000000
|
||||
|
||||
a, b, c = _mix64(a, b, c)
|
||||
|
||||
blob = blob[24:]
|
||||
|
||||
return c
|
||||
|
||||
|
||||
def _mix64(a: int, b: int, c: int) -> tuple[int, int, int]:
|
||||
"""
|
||||
Mix 3 64-bit values reversibly.
|
||||
|
||||
This function is part of the Python port of Bob Jenkin's hash algorithm, as
|
||||
detailed in `_lookup8`.
|
||||
"""
|
||||
a -= b
|
||||
a -= c
|
||||
a ^= c >> 43
|
||||
a %= 0x10000000000000000
|
||||
|
||||
b -= c
|
||||
b -= a
|
||||
b ^= a << 9
|
||||
b %= 0x10000000000000000
|
||||
|
||||
c -= a
|
||||
c -= b
|
||||
c ^= b >> 8
|
||||
c %= 0x10000000000000000
|
||||
|
||||
a -= b
|
||||
a -= c
|
||||
a ^= c >> 38
|
||||
a %= 0x10000000000000000
|
||||
|
||||
b -= c
|
||||
b -= a
|
||||
b ^= a << 23
|
||||
b %= 0x10000000000000000
|
||||
|
||||
c -= a
|
||||
c -= b
|
||||
c ^= b >> 5
|
||||
c %= 0x10000000000000000
|
||||
|
||||
a -= b
|
||||
a -= c
|
||||
a ^= c >> 35
|
||||
a %= 0x10000000000000000
|
||||
|
||||
b -= c
|
||||
b -= a
|
||||
b ^= a << 49
|
||||
b %= 0x10000000000000000
|
||||
|
||||
c -= a
|
||||
c -= b
|
||||
c ^= b >> 11
|
||||
c %= 0x10000000000000000
|
||||
|
||||
a -= b
|
||||
a -= c
|
||||
a ^= c >> 12
|
||||
a %= 0x10000000000000000
|
||||
|
||||
b -= c
|
||||
b -= a
|
||||
b ^= a << 18
|
||||
b %= 0x10000000000000000
|
||||
|
||||
c -= a
|
||||
c -= b
|
||||
c ^= b >> 22
|
||||
c %= 0x10000000000000000
|
||||
|
||||
return a, b, c
|
||||
|
||||
|
||||
class DyldSharedCacheHashSet:
|
||||
"""
|
||||
A hash set from the DyLD Shared Cache.
|
||||
|
||||
The DyLD Shared Cache uses hash sets in all structures related to Objective-C
|
||||
Optimization. This class is an interface to them.
|
||||
"""
|
||||
|
||||
def __init__(self, ptr: int):
|
||||
self._ptr = ptr
|
||||
|
||||
self.capacity = pwndbg.aglib.memory.u32(self._ptr + 0x04)
|
||||
self.shift = pwndbg.aglib.memory.u32(self._ptr + 0x0C)
|
||||
self.mask = pwndbg.aglib.memory.u32(self._ptr + 0x10)
|
||||
self.salt = pwndbg.aglib.memory.u64(self._ptr + 0x18)
|
||||
|
||||
# Mask must always be one minus a power of two. If this fails, it hints
|
||||
# that we loaded from an invalid address.
|
||||
assert (self.mask + 1).bit_count() == 1
|
||||
|
||||
# Name the offsets of elements in the dynamically-sized portion of the
|
||||
# structure (which starts at 0x420).
|
||||
self._checkbytes_offset = 0x420 + self.mask + 1
|
||||
self._offsets_offset = self._checkbytes_offset + self.capacity
|
||||
|
||||
# Preload the scramble and tab lists, to save on LLDB calls later on.
|
||||
self._scramble = pwndbg.aglib.memory.read(self._ptr + 0x20, 0x400)
|
||||
self._tab = pwndbg.aglib.memory.read(self._ptr + 0x420, self.mask + 1)
|
||||
|
||||
# It is possible that the offsets array is not aligned. The code in
|
||||
# libmacho does not seem to care about this condition, but we should
|
||||
# probably watch out if it ever does arise in a real-world scenario.
|
||||
assert self._offsets_offset % 4 == 0, "Unaligned offset array in Mach-O perfect hash map"
|
||||
|
||||
def _index_of(self, key: bytes) -> int:
|
||||
lookup = _lookup8(key, self.salt)
|
||||
|
||||
tab = lookup & self.mask
|
||||
tabbed = self._tab[tab]
|
||||
|
||||
scrambled = struct.unpack("<I", self._scramble[tabbed * 4 : (tabbed + 1) * 4])[0]
|
||||
|
||||
return ((lookup >> self.shift) % 0x100000000) ^ scrambled
|
||||
|
||||
def lookup(self, key: bytes) -> int | None:
|
||||
"""
|
||||
Look up the given key in the hash set.
|
||||
|
||||
Returns a pointer to the key if it is present, None otherwise.
|
||||
"""
|
||||
index = self._index_of(key)
|
||||
|
||||
# In libmacho, Apple uses the checkbytes as a way to quickly reject
|
||||
# elements that are not in the list without having to compare the keys,
|
||||
# but we currently have no need for that optimization.
|
||||
offset = pwndbg.aglib.memory.s32(self._ptr + self._offsets_offset + index * 4)
|
||||
if offset == 0:
|
||||
return None
|
||||
|
||||
ptr = self._ptr + offset
|
||||
|
||||
val = pwndbg.aglib.memory.string(ptr)
|
||||
if val != key:
|
||||
return None
|
||||
|
||||
return ptr
|
||||
|
||||
def keys(self) -> Generator[bytes]:
|
||||
"""
|
||||
Returns an iterator over all the keys present in the hash set.
|
||||
"""
|
||||
for i in range(self.capacity):
|
||||
offset = pwndbg.aglib.memory.s32(self._ptr + self._offsets_offset + i * 4)
|
||||
if offset == 0:
|
||||
continue
|
||||
|
||||
yield pwndbg.aglib.memory.string(self._ptr + offset)
|
||||
|
||||
|
||||
class DyldSharedCache:
|
||||
"""
|
||||
Handle to the DyLD Shared Cache in the address space of the inferior.
|
||||
|
||||
The shared cache format handling code in libmacho has multiple paths for
|
||||
gathering the same information, depending on a value that is near the
|
||||
beggining of the header, which indicates that the format has likely evolved
|
||||
quite a bit since its first intoduction.
|
||||
|
||||
The way the version of a given shared cache is determined isn't exactly
|
||||
straighforward, and relies on a combination of the `magic` and
|
||||
`mappingOffset` values. Fortunately for us, however, when `mappingOffset` is
|
||||
used for this purpose, it follows the fairly widely used pattern of using
|
||||
the size of the struct to denote its version.
|
||||
"""
|
||||
|
||||
def __init__(self, addr: int):
|
||||
self.addr = addr
|
||||
|
||||
# Preload a few a few values, to speed things up later.
|
||||
images_offset = 0x18 if self._header_size() <= 0x1C4 else 0x1C0
|
||||
self._images_base = self.addr + pwndbg.aglib.memory.u32(self.addr + images_offset)
|
||||
self.image_count = pwndbg.aglib.memory.u32(self.addr + images_offset + 4)
|
||||
|
||||
# Check whether the images are sorted by loading address.
|
||||
self._images_sorted_by_address = all(
|
||||
a[1] <= b[1] for a, b in itertools.pairwise(self.images)
|
||||
)
|
||||
|
||||
def _header_size(self) -> int:
|
||||
"""
|
||||
The length of the shared cache header, in bytes.
|
||||
"""
|
||||
# Read `mappingOffset` (+0x10) from the structure.
|
||||
return pwndbg.aglib.memory.u32(self.addr + 16)
|
||||
|
||||
def mappings(self) -> Generator[DyldSharedCacheMapping]:
|
||||
"""
|
||||
Generate the list of memory mappings in the shared cache.
|
||||
"""
|
||||
if self._header_size() <= 0x138:
|
||||
# This header predates `mappingWithSlideOffset` (+0x138), so use the
|
||||
# regular `mappingOffset` value and regular mapping structures. Read
|
||||
# the number of mapping structures from `mappingCount` (+0x14).
|
||||
base = self.addr + self._header_size()
|
||||
count = pwndbg.aglib.memory.u32(self.addr + 0x14)
|
||||
|
||||
for i in range(count):
|
||||
entry = base + i * 0x20
|
||||
yield DyldSharedCacheMapping(
|
||||
pwndbg.aglib.memory.u64(entry),
|
||||
pwndbg.aglib.memory.u64(entry + 8),
|
||||
pwndbg.aglib.memory.u64(entry + 16),
|
||||
pwndbg.aglib.memory.u32(entry + 24),
|
||||
pwndbg.aglib.memory.u32(entry + 28),
|
||||
)
|
||||
else:
|
||||
# We can use `mappingWithSlideOffset` (+0x138) and mapping with
|
||||
# slide structures for the mappings. Read the number of mapping
|
||||
# structures from `mappingWithSlideCount` (+0x13c).
|
||||
base = self.addr + pwndbg.aglib.memory.u32(self.addr + 0x138)
|
||||
count = pwndbg.aglib.memory.u32(self.addr + 0x13C)
|
||||
|
||||
for i in range(count):
|
||||
entry = base + i * 0x38
|
||||
yield DyldSharedCacheMapping(
|
||||
pwndbg.aglib.memory.u64(entry),
|
||||
pwndbg.aglib.memory.u64(entry + 8),
|
||||
pwndbg.aglib.memory.u64(entry + 16),
|
||||
pwndbg.aglib.memory.u32(entry + 48),
|
||||
pwndbg.aglib.memory.u32(entry + 52),
|
||||
)
|
||||
|
||||
@property
|
||||
def base(self) -> int:
|
||||
"""
|
||||
The base virtual address of the DyLD Shared Cache.
|
||||
"""
|
||||
return self.addr
|
||||
|
||||
@property
|
||||
def size(self) -> int:
|
||||
"""
|
||||
The mapped size, in bytes, of the DyLD Shared Cache.
|
||||
"""
|
||||
if self._header_size() >= 0x18C:
|
||||
# Use `sharedRegionSize` (+0xe8) as the size of the entire shared
|
||||
# region.
|
||||
return pwndbg.aglib.memory.u64(self.addr + 0xE8)
|
||||
else:
|
||||
# Find the smallest region that covers all the mappings as the size.
|
||||
start = None
|
||||
end = None
|
||||
for mapping in self.mappings():
|
||||
if start is None or start > mapping.addr:
|
||||
start = mapping.addr
|
||||
|
||||
this_end = start + mapping.size
|
||||
if end is None or end < this_end:
|
||||
end = this_end
|
||||
|
||||
# Technically possible, but more likely indicates that we messed up
|
||||
# somewhere along the line when interpreting mapping information.
|
||||
assert start is not None and end is not None, "No dyld shared cache mappings?"
|
||||
assert end >= start
|
||||
|
||||
return end - start
|
||||
|
||||
@property
|
||||
def slide(self) -> int:
|
||||
"The slide value of the DyLD Shared Cache, in bytes."
|
||||
mapping_ptr = self.base + self._header_size()
|
||||
mapping_base = pwndbg.aglib.memory.u64(mapping_ptr)
|
||||
|
||||
# Make sure this is the start of the shared cache.
|
||||
#
|
||||
# Again, technically possible, but this breaks compatibility in a way
|
||||
# that we have no idea how to deal with. Better to fail and figure out
|
||||
# we're doing something wrong than have to track a random bug back to
|
||||
# this point.
|
||||
mapping_fileoff = pwndbg.aglib.memory.u64(mapping_ptr + 0x10)
|
||||
assert (
|
||||
mapping_fileoff == 0
|
||||
), "First mapping of the shared cache is not at the start of the shared cache"
|
||||
|
||||
slide = self.base - mapping_base
|
||||
assert slide >= 0, "Slide value is negative, but we don't expect it to be"
|
||||
|
||||
return slide
|
||||
|
||||
@property
|
||||
def image_index_trie(self) -> Trie[int] | None:
|
||||
"""
|
||||
The trie of image indices, if available.
|
||||
"""
|
||||
if self._header_size() <= 0x110:
|
||||
return None
|
||||
|
||||
trie_unslid = pwndbg.aglib.memory.u64(self.addr + 0x108)
|
||||
trie_ptr = trie_unslid + self.slide
|
||||
|
||||
return Trie(trie_ptr, _uleb128_ty)
|
||||
|
||||
def image_base(self, index: int):
|
||||
assert self.image_count > index
|
||||
|
||||
return pwndbg.aglib.memory.u64(self._images_base + index * 0x20)
|
||||
|
||||
def image_name(self, index: int):
|
||||
assert self.image_count > index
|
||||
|
||||
return pwndbg.aglib.memory.string(
|
||||
self.addr + pwndbg.aglib.memory.u32(self._images_base + index * 0x20 + 0x18)
|
||||
)
|
||||
|
||||
@property
|
||||
def images(self) -> Generator[Tuple[bytes, int]]:
|
||||
# This is a little convoluted, but this function is quite hot and
|
||||
# calling the debugger can be quite slow, so pulling in the whole array
|
||||
# at once goes a really long way.
|
||||
#
|
||||
# Yes, even with the extra logic. Python is slow, but it's not as
|
||||
# slow as calling LLDB an extra time on every iteration.
|
||||
data = pwndbg.aglib.memory.read(self._images_base, 0x20 * self.image_count)
|
||||
|
||||
for i in range(self.image_count):
|
||||
base = i * 0x20
|
||||
yield (
|
||||
pwndbg.aglib.memory.string(
|
||||
self.addr + struct.unpack("<I", data[base + 0x18 : base + 0x1C])[0]
|
||||
),
|
||||
struct.unpack("<Q", data[base : base + 8])[0],
|
||||
)
|
||||
|
||||
@property
|
||||
def images_sorted(self) -> Generator[Tuple[bytes, int]]:
|
||||
"Same as images, but guaranteed to be sorted by increasing base address"
|
||||
if self._images_sorted_by_address:
|
||||
# The images are naturally sorted by increasing base address.
|
||||
#
|
||||
# This should be true the _vast_ majority of the time, and perhaps
|
||||
# even all the time. Just connect the generators.
|
||||
yield from self.images
|
||||
else:
|
||||
# The images are sorted in some other order.
|
||||
#
|
||||
# This should be very rare, but we shoulnd't fail if it happens.
|
||||
# Unlike the other cases in which we have to choose whether to fail
|
||||
# at or gracefully handle a weird condition, libmacho doesn't seem
|
||||
# to rely on this being the case.
|
||||
images = list(self.images)
|
||||
images.sort(key=lambda image: image[1])
|
||||
|
||||
yield from iter(images)
|
||||
|
||||
def is_address_in_shared_cache(self, addr: int) -> int:
|
||||
"""
|
||||
Whether the given address is in the shared cache.
|
||||
"""
|
||||
return addr >= self.base and addr < self.base + self.size
|
||||
|
||||
def objc_builtin_selectors(self) -> DyldSharedCacheHashSet:
|
||||
"""
|
||||
Looks up the hash table of builtin Objective-C selectors and returns it.
|
||||
"""
|
||||
if self._header_size() > 0x1D8:
|
||||
# Use `objcOptsOffset` and the new Objective-C optimizations header
|
||||
# to find the address of the symbol hash set.
|
||||
|
||||
objc_opt_offset = pwndbg.aglib.memory.u64(self.addr + 0x1D0)
|
||||
objc_opt_ptr = self.addr + objc_opt_offset
|
||||
|
||||
offset = pwndbg.aglib.memory.u64(objc_opt_ptr + 0x18)
|
||||
ptr = self.addr + offset
|
||||
|
||||
# Technically possible, but we have *no* idea what to do if this
|
||||
# happens, and it's more likely that we got something wrong.
|
||||
assert (
|
||||
offset != 0
|
||||
), "Tried to query builtin selector identity, but have no Objective-C optimization header?"
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Objective-C optimization queries are not yet supported for shared caches that have no objcOptsOffset value"
|
||||
)
|
||||
|
||||
return DyldSharedCacheHashSet(ptr)
|
||||
|
||||
|
||||
@pwndbg.lib.cache.cache_until("exit")
|
||||
def shared_cache() -> DyldSharedCache | None:
|
||||
"""
|
||||
Base address of the Darwin shared cache.
|
||||
|
||||
In Darwin, the way the Objective-C Runtime queries for this value is to call
|
||||
`_dyld_get_shared_cache_range` from libdyld[1], which then calls a routine
|
||||
that lives inside dyld itself, and that returns the values after poking into
|
||||
internal C++ structures.
|
||||
|
||||
From our perspective, that kind of sucks. Calling routines from debuggers
|
||||
can be quite unreliable, and so ideally we'd always be peeking into the data
|
||||
structures directly. But, in this case, even for Apple these are considered
|
||||
entirely private to dyld[2], and so there's even less of a stability guarantee
|
||||
for the layout of these structures than normal.
|
||||
|
||||
Because of this, a level of care must be taken before calling this function,
|
||||
as it must be assumed that the state of the inferior can be changed by it.
|
||||
|
||||
[1]: https://github.com/apple-oss-distributions/objc4/blob/f126469408dc82bd3f327217ae678fd0e6e3b37c/runtime/objc-opt.mm#L434
|
||||
[2]: https://github.com/apple-oss-distributions/dyld/blob/main/doc/dyld4.md#libdylddylib
|
||||
"""
|
||||
base = int(
|
||||
pwndbg.dbg.selected_inferior().evaluate_expression(
|
||||
"(const void*)_dyld_get_shared_cache_range()"
|
||||
)
|
||||
)
|
||||
|
||||
if base == 0:
|
||||
return None
|
||||
|
||||
return DyldSharedCache(base)
|
||||
@ -0,0 +1,906 @@
|
||||
"""
|
||||
Apple Objective-C Runtime Support
|
||||
|
||||
This module implements support for analyzing the Apple Objective-C runtime. As
|
||||
expected, Apple provides no oficial specification for the internal ABI of ObjC
|
||||
and no guarantees of its stability, and so this module is not guaranteed to
|
||||
work on all versions of Darwin.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Callable
|
||||
from typing import Generator
|
||||
from typing import Generic
|
||||
from typing import TypeVar
|
||||
|
||||
from typing_extensions import override
|
||||
|
||||
import pwndbg
|
||||
import pwndbg.aglib.arch
|
||||
import pwndbg.aglib.macho
|
||||
import pwndbg.aglib.memory
|
||||
import pwndbg.aglib.symbol
|
||||
import pwndbg.aglib.typeinfo
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class _IdRaw:
|
||||
"""
|
||||
Pointer to an Objective-C object in the heap.
|
||||
"""
|
||||
|
||||
def __init__(self, ptr: int):
|
||||
self.addr = ptr
|
||||
|
||||
|
||||
class _IdTagged:
|
||||
"""
|
||||
Tagged pointer to an Objective-C object.
|
||||
|
||||
This is a bit of a misnomer, as tagged pointers may not be pointers at all,
|
||||
and the data for the entire object may be contained in the payload, with no
|
||||
backing allocation in the heap. It is up to the class to determine how to
|
||||
decode the payload properly.
|
||||
"""
|
||||
|
||||
def __init__(self, tag: int, payload: int, extended: bool):
|
||||
self.tag = tag
|
||||
self.payload = payload
|
||||
self.extended = extended
|
||||
|
||||
def lookup_class(self) -> Class:
|
||||
"""
|
||||
Looks up the class object matching the tag in this pointer.
|
||||
"""
|
||||
classes = _tagged_pointer_classes()
|
||||
|
||||
if self.extended:
|
||||
classes += self.tag - 256
|
||||
else:
|
||||
classes += self.tag
|
||||
|
||||
ptr = pwndbg.aglib.memory.read_pointer_width(int(classes.address))
|
||||
ptr = _ptrauth_strip(ptr)
|
||||
|
||||
return Class(ptr)
|
||||
|
||||
|
||||
class _IsaPtr:
|
||||
"""
|
||||
Pointer to an `isa_t` structure.
|
||||
"""
|
||||
|
||||
ISA_MASK = 0x0000000FFFFFFFF8
|
||||
"Mask of bits containing just the authenticated class pointer."
|
||||
|
||||
def __init__(self, addr: int):
|
||||
self._addr = addr
|
||||
|
||||
def _read(self) -> int:
|
||||
"""
|
||||
Read the bits of the `isa_t` structure into an integer.
|
||||
"""
|
||||
return pwndbg.aglib.memory.read_pointer_width(self._addr)
|
||||
|
||||
def get_class(self) -> Class:
|
||||
ptr = self._read() & _IsaPtr.ISA_MASK
|
||||
ptr = _ptrauth_strip(ptr)
|
||||
|
||||
return Class(ptr)
|
||||
|
||||
|
||||
def _isa_class_mask() -> int:
|
||||
return pwndbg.aglib.memory.read_pointer_width(
|
||||
pwndbg.aglib.symbol.lookup_symbol_addr("objc_debug_isa_class_mask")
|
||||
)
|
||||
|
||||
|
||||
class _ClassRoPtr:
|
||||
RO_META = 0x1
|
||||
RO_ROOT = 0x2
|
||||
RO_HAS_CXX_STRUCTORS = 0x4
|
||||
RO_HIDDEN = 0x10
|
||||
RO_EXCEPTION = 0x20
|
||||
RO_HAS_SWIFT_INITIALIZER = 0x40
|
||||
RO_IS_ARC = 0x80
|
||||
RO_HAS_CXX_DTOR_ONLY = 0x100
|
||||
RO_HAS_WEAK_WITHOUT_ARC = 0x200
|
||||
RO_FORBIDS_ASSOCIATED_OBJECTS = 0x400
|
||||
RO_FROM_BUNDLE = 0x20000000
|
||||
RO_FUTURE = 0x40000000
|
||||
RO_REALIZED = 0x80000000
|
||||
|
||||
def __init__(self, addr: int):
|
||||
self._ptr = addr
|
||||
|
||||
def name(self) -> bytes:
|
||||
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 24)
|
||||
return pwndbg.aglib.memory.string(ptr)
|
||||
|
||||
def flags(self) -> int:
|
||||
return pwndbg.aglib.memory.u32(self._ptr)
|
||||
|
||||
def methods(self) -> Generator[Method]:
|
||||
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 32)
|
||||
|
||||
if ptr & 1 == 0:
|
||||
if ptr == 0:
|
||||
return
|
||||
yield from _MethodList(ptr).entries()
|
||||
else:
|
||||
if ptr & ~1 == 0:
|
||||
# Not expected to happen, but better safe than sorry.
|
||||
return
|
||||
|
||||
list_of_lists = _RelativeListOfLists(_MethodList, ptr & ~1)
|
||||
for lst in list_of_lists.entries():
|
||||
if lst is None:
|
||||
continue
|
||||
yield from lst.get_list().entries()
|
||||
|
||||
def ivars(self) -> Generator[InstanceVariable]:
|
||||
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 0x30)
|
||||
if ptr != 0:
|
||||
yield from _IVarList(ptr).entries()
|
||||
|
||||
def properties(self) -> Generator[ClassProperty]:
|
||||
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 0x40)
|
||||
if ptr & 1 == 0:
|
||||
if ptr == 0:
|
||||
return
|
||||
|
||||
yield from _ClassPropertyList(ptr).entries()
|
||||
else:
|
||||
if ptr & ~1 == 0:
|
||||
# Not expected to happen, but better safe than sorry.
|
||||
return
|
||||
|
||||
list_of_lists = _RelativeListOfLists(_ClassPropertyList, ptr & ~1)
|
||||
for lst in list_of_lists.entries():
|
||||
if lst is None:
|
||||
continue
|
||||
yield from lst.get_list().entries()
|
||||
|
||||
|
||||
class _ClassRwExtPtr:
|
||||
def __init__(self, ptr: int):
|
||||
self._ptr = ptr
|
||||
|
||||
def ro(self) -> _ClassRoPtr:
|
||||
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr)
|
||||
ptr = _ptrauth_strip(ptr)
|
||||
return _ClassRoPtr(ptr)
|
||||
|
||||
def methods(self) -> _ListArray[Method]:
|
||||
return _ListArray(_MethodList, self._ptr + pwndbg.aglib.typeinfo.ptrsize)
|
||||
|
||||
def properties(self) -> _ListArray[ClassProperty]:
|
||||
return _ListArray(_ClassPropertyList, self._ptr + 2 * pwndbg.aglib.typeinfo.ptrsize)
|
||||
|
||||
def demangled_name(self) -> bytes | None:
|
||||
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 4 * pwndbg.aglib.typeinfo.ptrsize)
|
||||
if ptr == 0:
|
||||
return None
|
||||
return pwndbg.aglib.memory.string(ptr)
|
||||
|
||||
def version(self) -> int:
|
||||
return pwndbg.aglib.memory.u32(self._ptr + 5 * pwndbg.aglib.typeinfo.ptrsize)
|
||||
|
||||
|
||||
class _ClassRwPtr:
|
||||
RW_REALIZED = 1 << 31
|
||||
|
||||
def __init__(self, ptr: int):
|
||||
self._ptr = ptr
|
||||
|
||||
def ro_or_rw_ext(self) -> _ClassRoPtr | _ClassRwExtPtr:
|
||||
ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr + 8)
|
||||
if ptr & 1 == 1:
|
||||
return _ClassRwExtPtr(ptr & ~1)
|
||||
else:
|
||||
return _ClassRoPtr(ptr)
|
||||
|
||||
|
||||
class _ClassDataBitsPtr:
|
||||
"""
|
||||
Pointer to a `class_data_bits_t` structure.
|
||||
"""
|
||||
|
||||
FAST_IS_RW_POINTER = 0x8000000000000000
|
||||
FAST_IS_SWIFT_LEGACY = 0x1
|
||||
FAST_IS_SWIFT_STABLE = 0x2
|
||||
FAST_HAS_DEFAULT_RR = 0x4
|
||||
|
||||
FAST_DATA_MASK = 0x0F007FFFFFFFFFF8
|
||||
|
||||
def __init__(self, ptr: int):
|
||||
self._ptr = ptr
|
||||
|
||||
def data(self) -> _ClassRoPtr | _ClassRwPtr:
|
||||
if self._is_rw():
|
||||
return _ClassRwPtr(self._data_addr())
|
||||
|
||||
return _ClassRoPtr(self._data_addr())
|
||||
|
||||
def _is_rw(self) -> bool:
|
||||
return ((self._ptr & _ClassDataBitsPtr.FAST_IS_RW_POINTER) != 0) or (
|
||||
(self._flags() & _ClassRwPtr.RW_REALIZED) != 0
|
||||
)
|
||||
|
||||
def _data_addr(self) -> int:
|
||||
return _ptrauth_strip(self._ptr) & _ClassDataBitsPtr.FAST_DATA_MASK
|
||||
|
||||
def _flags(self) -> int:
|
||||
return pwndbg.aglib.memory.u32(self._data_addr())
|
||||
|
||||
|
||||
class _EntList(Generic[T]):
|
||||
"""
|
||||
Entity list.
|
||||
"""
|
||||
|
||||
_flags_mask: int = 0
|
||||
"Mask for the flag bits of `entsizeAndFlags`"
|
||||
|
||||
def __init__(self, ptr: int):
|
||||
self._addr = self._addr_from_ptr(ptr)
|
||||
self._ptr = ptr
|
||||
|
||||
def _entsize_and_flags(self) -> int:
|
||||
return pwndbg.aglib.memory.u32(self._addr)
|
||||
|
||||
def _entries(self) -> int:
|
||||
return pwndbg.aglib.memory.u32(self._addr + 4)
|
||||
|
||||
def flags(self) -> int:
|
||||
return self._entsize_and_flags() & self._flags_mask
|
||||
|
||||
def entsize(self) -> int:
|
||||
return self._entsize_and_flags() & ~self._flags_mask
|
||||
|
||||
def _modify_pointer(self, ptr: int) -> int:
|
||||
return ptr
|
||||
|
||||
def _from_ptr(self, ptr: int) -> T:
|
||||
"""
|
||||
Build the type of this list from a pointer.
|
||||
|
||||
Must be implemented by the specialized class.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def _addr_from_ptr(self, ptr: int) -> int:
|
||||
"""
|
||||
Strip any metadata from the pointer to this list.
|
||||
|
||||
Must be implemented by the specialized class.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self._entries()
|
||||
|
||||
def get(self, i: int) -> T:
|
||||
if i >= len(self):
|
||||
raise IndexError(f"Index {i} is out-of-range for entlist with {len(self)} entries")
|
||||
|
||||
return self._from_ptr(self._modify_pointer(self._addr + 8 + i * self.entsize()))
|
||||
|
||||
def entries(self) -> Generator[T]:
|
||||
for i in range(len(self)):
|
||||
yield self.get(i)
|
||||
|
||||
|
||||
class _RelativeListOfListsEntry(Generic[T]):
|
||||
def __init__(self, ty: Callable[[int], _EntList[T]], ptr: int):
|
||||
self._ptr = ptr
|
||||
self._ty = ty
|
||||
|
||||
def image_index(self) -> int:
|
||||
return pwndbg.aglib.memory.u64(self._ptr) & 0xFFFF
|
||||
|
||||
def _list_offset(self) -> int:
|
||||
return pwndbg.aglib.memory.s64(self._ptr) >> 16
|
||||
|
||||
def get_list(self) -> _EntList[T]:
|
||||
return self._ty(self._ptr + self._list_offset())
|
||||
|
||||
|
||||
class _RelativeListOfLists(
|
||||
_EntList[_RelativeListOfListsEntry[T] | None],
|
||||
Generic[T],
|
||||
):
|
||||
"""
|
||||
An array of relative pointers to lists.
|
||||
|
||||
This corresponds to the `relative_list_list_t` type in libobjc.
|
||||
"""
|
||||
|
||||
def __init__(self, ty: Callable[[int], _EntList[T]], ptr: int):
|
||||
super().__init__(ptr)
|
||||
self._ty = ty
|
||||
|
||||
@override
|
||||
def _addr_from_ptr(self, ptr: int) -> int:
|
||||
# Top-Byte-Ignore is assumed for method lists, but method list pointers
|
||||
# may have metadata attached to them.
|
||||
return ptr & ~0xFF00000000000000
|
||||
|
||||
@override
|
||||
def _from_ptr(self, ptr: int) -> _RelativeListOfListsEntry[T] | None:
|
||||
entry = _RelativeListOfListsEntry(self._ty, ptr)
|
||||
if not _header_info_rw_is_image_loaded(entry.image_index()):
|
||||
# The entry is only valid if its corresponding image has been marked
|
||||
# as loaded in `objc_debug_headerInfoRWs`.
|
||||
return None
|
||||
|
||||
return entry
|
||||
|
||||
|
||||
class _ListArray(Generic[T]):
|
||||
"""
|
||||
A runtime-polymorphic array type for lists. May be a pointer to a list type,
|
||||
an array of pointers, or a _RelativeListOfLists, distinguished by a tag in
|
||||
a pointer.
|
||||
|
||||
Strangely for Apple, the tagged pointer to the final list is contained
|
||||
inside the list array structure, rather than having the whole structure be
|
||||
inlined into a pointer value. Suspiciously sane.
|
||||
|
||||
This corresponds to the `list_array_tt` type in libobjc.
|
||||
"""
|
||||
|
||||
def __init__(self, ty: Callable[[int], _EntList[T]], ptr: int):
|
||||
self._ptr = ptr
|
||||
self._ty = ty
|
||||
|
||||
def entries(self) -> Generator[T]:
|
||||
raw_ptr = pwndbg.aglib.memory.read_pointer_width(self._ptr)
|
||||
|
||||
tag = raw_ptr & 3
|
||||
ptr = raw_ptr & ~3
|
||||
|
||||
if ptr == 0:
|
||||
return
|
||||
|
||||
if tag == 0:
|
||||
# This is just a pointer to the list.
|
||||
yield from self._ty(ptr).entries()
|
||||
elif tag == 1:
|
||||
# This is an array of lists.
|
||||
count = pwndbg.aglib.memory.u32(ptr)
|
||||
for i in range(count):
|
||||
yield from self._ty(
|
||||
pwndbg.aglib.memory.read_pointer_width(
|
||||
ptr + 8 + i * pwndbg.aglib.typeinfo.ptrsize
|
||||
)
|
||||
).entries()
|
||||
elif tag == 2:
|
||||
# This is a relative list of lists.
|
||||
for ll in _RelativeListOfLists(self._ty, ptr).entries():
|
||||
yield from ll.get_list().entries()
|
||||
|
||||
|
||||
def _header_info_rw_is_image_loaded(index: int) -> bool:
|
||||
"""
|
||||
Queries `objc_debug_headerInfoRWs` and checks whether the image with the
|
||||
given index is loaded.
|
||||
"""
|
||||
addr = pwndbg.aglib.memory.read_pointer_width(
|
||||
pwndbg.aglib.symbol.lookup_symbol_addr("objc_debug_headerInfoRWs")
|
||||
)
|
||||
|
||||
count = pwndbg.aglib.memory.u32(addr)
|
||||
entsize = pwndbg.aglib.memory.u32(addr + 4)
|
||||
|
||||
if index >= count:
|
||||
raise IndexError(
|
||||
f"Image index {index} is out-of-bounds for headerInfoRWs structure with {count} entries"
|
||||
)
|
||||
|
||||
return pwndbg.aglib.memory.read_pointer_width(addr + 8 + entsize * index) & 1 == 1
|
||||
|
||||
|
||||
def _tagged_pointer_classes() -> pwndbg.dbg_mod.Value:
|
||||
"""
|
||||
The Objective-C runtime tagged pointer classs list.
|
||||
|
||||
The classes to which the tag values in a tagged pointer corresponds are not
|
||||
fixed, and are instead stored in a runtime-global array that gets looked up
|
||||
when a message is sent.
|
||||
"""
|
||||
return pwndbg.aglib.symbol.lookup_symbol("objc_debug_taggedpointer_classes").cast(
|
||||
pwndbg.aglib.typeinfo.void.pointer().pointer()
|
||||
)
|
||||
|
||||
|
||||
def _ptr_obfuscation_value() -> int:
|
||||
"""
|
||||
The Objective-C runtime obfuscates tagged pointer values.
|
||||
"""
|
||||
return pwndbg.aglib.memory.read_pointer_width(
|
||||
pwndbg.aglib.symbol.lookup_symbol_addr("objc_debug_taggedpointer_obfuscator")
|
||||
)
|
||||
|
||||
|
||||
def _try_decode_tagged_split(ptr: int) -> _IdTagged | None:
|
||||
"""
|
||||
Decodes a tagged pointer encoded in the split-tag scheme, if it is tagged.
|
||||
|
||||
This is the encoding scheme used in modern - iOS 14 and newer - ARM64
|
||||
platforms.
|
||||
|
||||
If the pointer is not tagged, returns `None`.
|
||||
"""
|
||||
if ptr & 0x8000000000000000 == 0:
|
||||
# Not a tagged pointer.
|
||||
return None
|
||||
|
||||
if ptr & 7 == 7:
|
||||
# This is an extended tag with a 52-bit payload.
|
||||
tag = (ptr >> 55) & 0xFF
|
||||
payload = (ptr >> 3) & 0xFFFFFFFFFFFFF
|
||||
extended = True
|
||||
else:
|
||||
# This is a short tag with a 60-bit payload.
|
||||
tag = ptr & 7
|
||||
payload = (ptr >> 3) & 0xFFFFFFFFFFFFFFF
|
||||
extended = False
|
||||
|
||||
return _IdTagged(tag, payload, extended)
|
||||
|
||||
|
||||
def _try_decode_tagged_lsb(ptr: int) -> _IdTagged:
|
||||
"""
|
||||
Decodes a tagged pointer encoded in the LSB-tag scheme, if it is tagged.
|
||||
|
||||
This is the encoding scheme used in all x86-64 versions of Darwin.
|
||||
|
||||
If the pointer is not tagged, returns `None`.
|
||||
"""
|
||||
if ptr & 1 == 0:
|
||||
# Not a tagged pointer.
|
||||
return None
|
||||
|
||||
if ptr & 14 == 14:
|
||||
# This is an extended tag with a 52-bit payload.
|
||||
tag = (ptr >> 4) & 0xFF
|
||||
payload = ptr >> 12
|
||||
extended = True
|
||||
else:
|
||||
# This is a short tage with a 60-bit payload.
|
||||
tag = (ptr >> 1) & 7
|
||||
payload = ptr >> 4
|
||||
extended = False
|
||||
|
||||
return _IdTagged(tag, payload, extended)
|
||||
|
||||
|
||||
def _decode_prog_id(ptr: int) -> _IdRaw | _IdTagged:
|
||||
"""
|
||||
Given an Objective-C program, decode it.
|
||||
"""
|
||||
|
||||
# First, check for tagged pointers.
|
||||
tagged = None
|
||||
match pwndbg.aglib.arch.name:
|
||||
case "aarch64":
|
||||
tagged = _try_decode_tagged_split(ptr)
|
||||
case "x86-64":
|
||||
tagged = _try_decode_tagged_lsb(ptr)
|
||||
case other:
|
||||
raise AssertionError(f"Unexpected Objective-C architecture: {other}")
|
||||
if tagged is not None:
|
||||
# Successfuly decoded the tagged pointer.
|
||||
return tagged
|
||||
|
||||
# This is a direct pointer.
|
||||
return _IdRaw(ptr)
|
||||
|
||||
|
||||
def _ptrauth_strip(ptr: int) -> int:
|
||||
"""
|
||||
Strip pointer signing information from a given signed pointer.
|
||||
"""
|
||||
return ptr & 0xFFFFFFFFFFFF
|
||||
|
||||
|
||||
class Object:
|
||||
_addr: int
|
||||
"Object pointer value, as seen in the program. May be tagged, obfuscated, authenticated."
|
||||
|
||||
_id: _IdRaw | _IdTagged
|
||||
"Decoded object pointer value. May be tagged."
|
||||
|
||||
def __init__(self, addr: int):
|
||||
self._addr = addr
|
||||
self._id = _decode_prog_id(addr)
|
||||
|
||||
@property
|
||||
def cls(self) -> Class | None:
|
||||
if isinstance(self._id, _IdRaw):
|
||||
isa = _IsaPtr(self._id.addr)
|
||||
return isa.get_class()
|
||||
elif isinstance(self._id, _IdTagged):
|
||||
return self._id.lookup_class()
|
||||
|
||||
|
||||
class Class(Object):
|
||||
def __init__(self, addr: int):
|
||||
super().__init__(addr)
|
||||
assert isinstance(self._id, _IdRaw), "Class pointers are never tagged"
|
||||
|
||||
def _data_bits(self) -> _ClassDataBitsPtr:
|
||||
# MyPy fails if we don't check this a second time.
|
||||
assert isinstance(self._id, _IdRaw), "Class pointers are never tagged"
|
||||
ptr = pwndbg.aglib.memory.read_pointer_width(self._id.addr + 32)
|
||||
ptr = _ptrauth_strip(ptr)
|
||||
return _ClassDataBitsPtr(ptr)
|
||||
|
||||
def _ro(self) -> _ClassRoPtr:
|
||||
data = self._data_bits().data()
|
||||
if isinstance(data, _ClassRoPtr):
|
||||
return data
|
||||
elif isinstance(data, _ClassRwPtr):
|
||||
ro_or_rw_ext = data.ro_or_rw_ext()
|
||||
if isinstance(ro_or_rw_ext, _ClassRwExtPtr):
|
||||
return ro_or_rw_ext.ro()
|
||||
elif isinstance(ro_or_rw_ext, _ClassRoPtr):
|
||||
return ro_or_rw_ext
|
||||
else:
|
||||
# FIXME: Should be `typing.assert_never`, needs Python 3.11
|
||||
assert False
|
||||
else:
|
||||
# FIXME: Should be `typing.assert_never`, needs Python 3.11
|
||||
assert False
|
||||
|
||||
def _rw_ext(self) -> _ClassRwExtPtr | None:
|
||||
data = self._data_bits().data()
|
||||
if isinstance(data, _ClassRoPtr):
|
||||
return None
|
||||
elif isinstance(data, _ClassRwPtr):
|
||||
ro_or_rw_ext = data.ro_or_rw_ext()
|
||||
if isinstance(ro_or_rw_ext, _ClassRwExtPtr):
|
||||
return ro_or_rw_ext
|
||||
elif isinstance(ro_or_rw_ext, _ClassRoPtr):
|
||||
return None
|
||||
else:
|
||||
# FIXME: Should be `typing.assert_never`, needs Python 3.11
|
||||
assert False
|
||||
else:
|
||||
# FIXME: Should be `typing.assert_never`, needs Python 3.11
|
||||
assert False
|
||||
|
||||
@property
|
||||
def superclass(self) -> Class | None:
|
||||
# MyPy fails if we don't check this a second time.
|
||||
assert isinstance(self._id, _IdRaw), "Class pointers are never tagged"
|
||||
|
||||
if self._ro().flags() & _ClassRoPtr.RO_ROOT != 0:
|
||||
# This is a root class, and thus has no superclass.
|
||||
return None
|
||||
|
||||
ptr_addr = self._id.addr + pwndbg.aglib.typeinfo.ptrsize
|
||||
ptr = pwndbg.aglib.memory.read_pointer_width(ptr_addr)
|
||||
ptr = _ptrauth_strip(ptr)
|
||||
|
||||
return Class(ptr)
|
||||
|
||||
@property
|
||||
def name(self) -> bytes:
|
||||
return self._ro().name()
|
||||
|
||||
@property
|
||||
def methods(self) -> Generator[Method]:
|
||||
if (rw_ext := self._rw_ext()) is not None:
|
||||
# Return the methods added to the class at runtime from the Class
|
||||
# R/W structure, which also include the base methods.
|
||||
yield from rw_ext.methods().entries()
|
||||
else:
|
||||
# Return the base methods.
|
||||
yield from self._ro().methods()
|
||||
|
||||
@property
|
||||
def ivars(self) -> Generator[InstanceVariable]:
|
||||
yield from self._ro().ivars()
|
||||
|
||||
@property
|
||||
def properties(self) -> Generator[ClassProperty]:
|
||||
if (rw_ext := self._rw_ext()) is not None:
|
||||
# Return the properties added to the class at runtime from the Class
|
||||
# R/W structure, which also include the base properties.
|
||||
yield from rw_ext.properties().entries()
|
||||
else:
|
||||
# Return the base properties.
|
||||
yield from self._ro().properties()
|
||||
|
||||
@property
|
||||
def is_metaclass(self) -> bool:
|
||||
return (self._ro().flags() & _ClassRoPtr.RO_META) != 0
|
||||
|
||||
@override
|
||||
@property
|
||||
def cls(self) -> Class | None:
|
||||
if self.is_metaclass:
|
||||
# Following this pointer in metaclasses is weird. Users are better
|
||||
# served following the superclass chain, instead.
|
||||
return None
|
||||
return super().cls
|
||||
|
||||
|
||||
class InstanceVariable:
|
||||
"""
|
||||
An Objective-C Instance Variable.
|
||||
|
||||
Instance Variables are NOT objects!
|
||||
"""
|
||||
|
||||
def __init__(self, ptr: int):
|
||||
self._ptr = ptr
|
||||
|
||||
@property
|
||||
def offset(self) -> int:
|
||||
"""
|
||||
The offset in bytes of this value from the start of the object instance.
|
||||
"""
|
||||
return pwndbg.aglib.memory.s32(pwndbg.aglib.memory.read_pointer_width(self._ptr))
|
||||
|
||||
@property
|
||||
def name(self) -> bytes:
|
||||
"""
|
||||
The name of this instance variable.
|
||||
"""
|
||||
return pwndbg.aglib.memory.string(
|
||||
pwndbg.aglib.memory.read_pointer_width(self._ptr + pwndbg.aglib.typeinfo.ptrsize)
|
||||
)
|
||||
|
||||
@property
|
||||
def typename(self) -> bytes:
|
||||
"""
|
||||
The name of the type of this instance variable.
|
||||
"""
|
||||
return pwndbg.aglib.memory.string(
|
||||
pwndbg.aglib.memory.read_pointer_width(self._ptr + pwndbg.aglib.typeinfo.ptrsize * 2)
|
||||
)
|
||||
|
||||
@property
|
||||
def alignment(self) -> int:
|
||||
"""
|
||||
The alignment of this instance variable, in bytes.
|
||||
"""
|
||||
align_log2 = pwndbg.aglib.memory.u32(self._ptr + pwndbg.aglib.typeinfo.ptrsize * 3)
|
||||
|
||||
# All ones indicates the natural alignment of a pointer.
|
||||
if align_log2 == 0xFFFFFFFF:
|
||||
return pwndbg.aglib.typeinfo.ptrsize
|
||||
|
||||
return 1 << align_log2
|
||||
|
||||
@property
|
||||
def size(self) -> int:
|
||||
"""
|
||||
The size of this instance variable, in bytes.
|
||||
"""
|
||||
return pwndbg.aglib.memory.u32(self._ptr + pwndbg.aglib.typeinfo.ptrsize * 3 + 4)
|
||||
|
||||
|
||||
class ClassProperty:
|
||||
def __init__(self, ptr: int):
|
||||
self._ptr = ptr
|
||||
|
||||
@property
|
||||
def name(self) -> bytes:
|
||||
"""
|
||||
The name of this class property.
|
||||
"""
|
||||
return pwndbg.aglib.memory.string(pwndbg.aglib.memory.read_pointer_width(self._ptr))
|
||||
|
||||
@property
|
||||
def value(self) -> bytes:
|
||||
"""
|
||||
The value of this property.
|
||||
"""
|
||||
return pwndbg.aglib.memory.string(
|
||||
pwndbg.aglib.memory.read_pointer_width(self._ptr + pwndbg.aglib.typeinfo.ptrsize)
|
||||
)
|
||||
|
||||
|
||||
class Selector:
|
||||
"""
|
||||
An Objective-C Selector.
|
||||
|
||||
Selectors are NOT objects!
|
||||
"""
|
||||
|
||||
def __init__(self, ptr: int):
|
||||
self._ptr = ptr
|
||||
|
||||
@property
|
||||
def name(self) -> bytes:
|
||||
"""
|
||||
Retrieves the name of this selector.
|
||||
"""
|
||||
|
||||
# In the Apple Objective-C runtime, selectors are human-readable strings
|
||||
# with unique identities[1]. The identity is simply the pointer to the
|
||||
# string itself, guaranteed by the tooling to be unique. To read the
|
||||
# name of the selector, then, we can simply follow its identity pointer.
|
||||
#
|
||||
# [1]: https://web.archive.org/web/20161010081824/http://unixjunkie.blogspot.com/2006/02/nil-and-nil.html
|
||||
return pwndbg.aglib.memory.string(self._ptr)
|
||||
|
||||
|
||||
class Method:
|
||||
"""
|
||||
An Objective-C Method Pointer.
|
||||
|
||||
Methods are NOT objects!
|
||||
|
||||
A method pointer can be one of three types: Small, small direct, and big.
|
||||
|
||||
Pointer types are distinguished by the two least significant bits in the
|
||||
integer representation of the pointer. A value of `1` is used for both small
|
||||
pointer types, while all other values are used to distinguish between the
|
||||
signing nuances of big pointers.
|
||||
|
||||
Small pointers 32-bit wide and relative to a given base value. Big pointers
|
||||
contain the pointers themselves, and they may or may not be signed.
|
||||
|
||||
Small direct pointers are small pointers that reside in the shared cache, and
|
||||
their selectors are relative to @selector(🤯), while the selectors of regular
|
||||
small pointers are relative to the pointers themselves.
|
||||
"""
|
||||
|
||||
def __init__(self, ptr: int):
|
||||
self._ptr = ptr
|
||||
|
||||
@property
|
||||
def sel(self) -> Selector:
|
||||
"The selector this method responds to."
|
||||
kind = self._ptr & 3
|
||||
base = self._ptr & ~3
|
||||
if kind == 1:
|
||||
if pwndbg.aglib.macho.shared_cache().is_address_in_shared_cache(base):
|
||||
# To resolve selectors of small method pointers in the shared cache,
|
||||
# we have to look up the identity of @selector(🤯).
|
||||
rel = (
|
||||
pwndbg.aglib.macho.shared_cache()
|
||||
.objc_builtin_selectors()
|
||||
.lookup("🤯".encode("utf-8"))
|
||||
)
|
||||
ptr = rel + pwndbg.aglib.memory.s32(base)
|
||||
else:
|
||||
offset = pwndbg.aglib.memory.s32(base)
|
||||
ref = base + offset
|
||||
|
||||
# Non-shared cache values are pointers to selectors.
|
||||
ptr = pwndbg.aglib.memory.read_pointer_width(ref)
|
||||
|
||||
return Selector(ptr)
|
||||
else:
|
||||
return Selector(_ptrauth_strip(pwndbg.aglib.memory.read_pointer_width(base)))
|
||||
|
||||
@property
|
||||
def types(self) -> bytes:
|
||||
"The types of the arguments to this method."
|
||||
kind = self._ptr & 3
|
||||
base = self._ptr & ~3
|
||||
if kind == 1:
|
||||
ptr = base + 4
|
||||
offset = pwndbg.aglib.memory.s32(ptr)
|
||||
addr = ptr + offset
|
||||
else:
|
||||
ptr = base + 8
|
||||
addr = _ptrauth_strip(
|
||||
pwndbg.aglib.memory.read_pointer_width(base + pwndbg.aglib.typeinfo.ptrsize)
|
||||
)
|
||||
|
||||
return pwndbg.aglib.memory.string(addr)
|
||||
|
||||
@property
|
||||
def imp(self) -> int:
|
||||
"The pointer to the function that implements this method."
|
||||
kind = self._ptr & 3
|
||||
base = self._ptr & ~3
|
||||
if kind == 1:
|
||||
# There's a bit of nuance here.
|
||||
#
|
||||
# Method swizzling for small pointers is implemented using a global
|
||||
# hash map of method pointers to implementation pointers. When
|
||||
# getting the IMP pointer for a small pointer, the runtime will
|
||||
# first check the global hash map to see if the method has been
|
||||
# swizzled, and return the swizzled method if it has. The runtime
|
||||
# will do what we do here if method has not been swizzled.
|
||||
#
|
||||
# Currently, we have no good way to query this map, and no other way
|
||||
# to detect that a method has been swizzled, so swizzles to small
|
||||
# pointers are unfortunately compeltely invisible to us.
|
||||
#
|
||||
# TODO: Handle method swizzles for small-pointer-type Objective-C methods.
|
||||
ptr = base + 8
|
||||
offset = pwndbg.aglib.memory.s32(ptr)
|
||||
return ptr + offset
|
||||
else:
|
||||
ptr = base + 16
|
||||
return _ptrauth_strip(
|
||||
pwndbg.aglib.memory.read_pointer_width(base + pwndbg.aglib.typeinfo.ptrsize)
|
||||
)
|
||||
|
||||
|
||||
class _MethodList(_EntList[Method]):
|
||||
"""
|
||||
Method entity list.
|
||||
"""
|
||||
|
||||
_flags_mask = 0xFFFF0003
|
||||
|
||||
SMALL_METHOD_LIST_FLAG = 0x80000000
|
||||
"Indicates that the pointers in this list are small method pointers."
|
||||
|
||||
BIG_SIGNED_METHOD_LIST_FLAG = 0x8000000000000000
|
||||
"""
|
||||
Indicates that the pointers in this list are big and signed.
|
||||
|
||||
Stored as part of the pointer to the method list, rather than in the flags
|
||||
field, as is the case with other flags.
|
||||
"""
|
||||
|
||||
@override
|
||||
def _modify_pointer(self, ptr: int) -> int:
|
||||
if self.flags() & self.SMALL_METHOD_LIST_FLAG != 0:
|
||||
# This is a small pointer list.
|
||||
return (ptr & ~3) | 1
|
||||
elif self._ptr & self.BIG_SIGNED_METHOD_LIST_FLAG:
|
||||
# This is a big signed poitner list.
|
||||
return (ptr & ~3) | 2
|
||||
else:
|
||||
# No tag or flag. This is a big pointer list.
|
||||
return ptr & ~3
|
||||
|
||||
@override
|
||||
def _addr_from_ptr(self, ptr: int) -> int:
|
||||
# Top-Byte-Ignore is assumed for method lists, but method list pointers
|
||||
# may have metadata attached to them.
|
||||
return ptr & ~0xFF00000000000000
|
||||
|
||||
@override
|
||||
def _from_ptr(self, ptr: int) -> Method:
|
||||
return Method(ptr)
|
||||
|
||||
|
||||
class _IVarList(_EntList[InstanceVariable]):
|
||||
"IVar entity list."
|
||||
|
||||
_flags_mask = 0
|
||||
|
||||
@override
|
||||
def _modify_pointer(self, ptr: int) -> int:
|
||||
return ptr
|
||||
|
||||
@override
|
||||
def _addr_from_ptr(self, ptr: int) -> int:
|
||||
return ptr
|
||||
|
||||
@override
|
||||
def _from_ptr(self, ptr: int) -> InstanceVariable:
|
||||
return InstanceVariable(ptr)
|
||||
|
||||
|
||||
class _ClassPropertyList(_EntList[ClassProperty]):
|
||||
"Class property entity list."
|
||||
|
||||
_flags_mask = 0
|
||||
|
||||
@override
|
||||
def _modify_pointer(self, ptr: int) -> int:
|
||||
return ptr
|
||||
|
||||
@override
|
||||
def _addr_from_ptr(self, ptr: int) -> int:
|
||||
return ptr
|
||||
|
||||
@override
|
||||
def _from_ptr(self, ptr: int) -> ClassProperty:
|
||||
return ClassProperty(ptr)
|
||||
Loading…
Reference in new issue