TLS related enhancements (#1536)

* Refactor TLS module

- Replace unreliable `__errno_location()` trick with `pthread_self()` to acquire TLS address
- Consolidate heap heuristics checks about TLS within the `pwndbg.gdblib.tls` module for better organization

* Bug fix for the `errno` command

Calling `__errno_location()` without locking the scheduler can cause another thread to inadvertently continue execution

* Refactor code about heap heuristics of thread-local variables

- Replace some checks with some functions in `pwndbg.gdblib.tls`
- Try to find tcache with `mp_.sbrk_base + 0x10` if the target is single-threaded

* Add tests for heap heuristics with multi-threaded

* Refacotr scheduler-locking related functions

- Move these functions into `pwndbg.gdblib.scheduler`
- Fetch the parameter value once (https://github.com/pwndbg/pwndbg/pull/1536#discussion_r1082549746)

* Avoid bug caused by GLIBC_TUNABLES

See https://github.com/pwndbg/pwndbg/pull/1536#discussion_r1083202815

* Add note about `set scheduler-locking on`

* Add comment for `lock_scheduler`

Co-authored-by: Disconnect3d <dominik.b.czarnota@gmail.com>

* Update DEVELOPING.md

Co-authored-by: Disconnect3d <dominik.b.czarnota@gmail.com>
pull/1533/head
Alan Li 3 years ago committed by GitHub
parent 52a479211c
commit 2a5f563444
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -108,3 +108,4 @@ Feel free to update the list below!
* We would like to add proper tests for pwndbg - see tests framework PR if you want to help on that.
* If you want to use `gdb.parse_and_eval("a_function_name()")` or something similar that call a function, please remember this might cause another thread to continue execution without `set scheduler-locking on`. If you didn't expect that, you should use `parse_and_eval_with_scheduler_lock` from `pwndbg.gdblib.scheduler` instead.

@ -10,6 +10,7 @@ import pwndbg.commands
import pwndbg.gdblib.regs
import pwndbg.gdblib.symbol
from pwndbg.commands import CommandCategory
from pwndbg.gdblib.scheduler import parse_and_eval_with_scheduler_lock
errno.errorcode[0] = "OK" # type: ignore # manually add error code 0 for "OK"
@ -44,7 +45,11 @@ def errno_(err) -> None:
if errno_loc_gotplt is None or pwndbg.gdblib.vmmap.find(
pwndbg.gdblib.memory.pvoid(errno_loc_gotplt)
):
err = int(gdb.parse_and_eval("*((int *(*) (void)) __errno_location) ()"))
err = int(
parse_and_eval_with_scheduler_lock(
"*((int *(*) (void)) __errno_location) ()"
)
)
else:
print(
"Could not determine error code automatically: the __errno_location@got.plt has no valid address yet (perhaps libc.so hasn't been loaded yet?)"

@ -45,7 +45,6 @@ def on_start() -> None:
@pwndbg.gdblib.events.exit
def on_exit() -> None:
pwndbg.gdblib.tls.reset()
pwndbg.gdblib.file.reset_remote_files()
pwndbg.gdblib.next.clear_temp_breaks()

@ -0,0 +1,30 @@
from contextlib import contextmanager
import gdb
@contextmanager
def lock_scheduler():
"""
This context manager can be used to run GDB commands with threads scheduling
being locked which means that other threads will be stopped during execution.
This is useful to prevent bugs where e.g.: gdb.parse_and_eval("(int)foo()")
would execute foo() on the current debugee thread but would also unlock other
threads for being executed and those other threads may for example hit a
breakpoint we set previously which would be confusing for the user.
See also: https://sourceware.org/gdb/onlinedocs/gdb/All_002dStop-Mode.html
"""
old_config = gdb.parameter("scheduler-locking")
if old_config != "on":
gdb.execute("set scheduler-locking on")
yield
gdb.execute("set scheduler-locking %s" % old_config)
else:
yield
def parse_and_eval_with_scheduler_lock(expr: str) -> gdb.Value:
with lock_scheduler():
return gdb.parse_and_eval(expr)

@ -12,76 +12,42 @@ import pwndbg.gdblib.memory
import pwndbg.gdblib.regs
import pwndbg.gdblib.symbol
import pwndbg.gdblib.vmmap
from pwndbg.gdblib.scheduler import parse_and_eval_with_scheduler_lock
class module(ModuleType):
"""Getting Thread Local Storage (TLS) information."""
_errno_offset = None
def is_thread_local_variable_offset(self, offset: int) -> bool:
"""Check if the offset to TLS is a valid offset for the heap heuristics."""
if pwndbg.gdblib.arch.current in ("x86-64", "i386"):
is_valid = 0 < -offset < 0x250
else: # elif pwndbg.gdblib.arch.current in ("aarch64", "arm"):
is_valid = 0 < offset < 0x250
# check alignment
return is_valid and offset % pwndbg.gdblib.arch.ptrsize == 0
def get_tls_base_via_errno_location(self) -> int:
"""Heuristically determine the base address of the TLS."""
if pwndbg.gdblib.symbol.address(
"__errno_location"
) is None or pwndbg.gdblib.arch.current not in (
"x86-64",
"i386",
"arm",
):
# Note: We doesn't implement this for aarch64 because its TPIDR_EL0 register seems always work
# If oneday we can't get TLS base via TPIDR_EL0, we should implement this for aarch64
return 0
already_lock = gdb.parameter("scheduler-locking") == "on"
old_config = gdb.parameter("scheduler-locking")
if not already_lock:
gdb.execute("set scheduler-locking on")
errno_addr = int(gdb.parse_and_eval("(int *)__errno_location()"))
if not already_lock:
gdb.execute("set scheduler-locking %s" % old_config)
def is_thread_local_variable(self, addr: int) -> bool:
"""Check if the address is a valid thread local variable's address for the heap heuristics."""
if not self.address:
# Since we can not get the TLS base address, we trust that the address is valid.
return True
return self.is_thread_local_variable_offset(
addr - self.address
) and addr in pwndbg.gdblib.vmmap.find(self.address)
if not self._errno_offset:
__errno_location_instr = pwndbg.disasm.near(
pwndbg.gdblib.symbol.address("__errno_location"), 5, show_prev_insns=False
)
if pwndbg.gdblib.arch.current == "x86-64":
for instr in __errno_location_instr:
# Find something like: mov rax, qword ptr [rip + disp]
if instr.mnemonic == "mov":
self._errno_offset = pwndbg.gdblib.memory.s64(instr.next + instr.disp)
break
elif pwndbg.gdblib.arch.current == "i386":
for instr in __errno_location_instr:
# Find something like: mov eax, dword ptr [eax + disp]
# (disp is a negative value)
if instr.mnemonic == "mov":
# base offset is from the first `add eax` after `call __x86.get_pc_thunk.bx`
base_offset_instr = next(
instr for instr in __errno_location_instr if instr.mnemonic == "add"
)
base_offset = base_offset_instr.address + base_offset_instr.operands[1].int
self._errno_offset = pwndbg.gdblib.memory.s32(base_offset + instr.disp)
break
elif pwndbg.gdblib.arch.current == "arm":
ldr_instr = None
for instr in __errno_location_instr:
if not ldr_instr and instr.mnemonic == "ldr":
ldr_instr = instr
elif ldr_instr and instr.mnemonic == "add":
offset = ldr_instr.operands[1].mem.disp
offset = pwndbg.gdblib.memory.s32((ldr_instr.address + 4 & -4) + offset)
self._errno_offset = pwndbg.gdblib.memory.s32(instr.address + 4 + offset)
break
if not self._errno_offset:
raise OSError("Can not find tls base")
return errno_addr - self._errno_offset
def call_pthread_self(self) -> int:
"""Get the address of TLS by calling pthread_self()."""
if pwndbg.gdblib.symbol.address("pthread_self") is None:
return 0
try:
return int(parse_and_eval_with_scheduler_lock("(void *)pthread_self()"))
except gdb.error:
return 0
@property
def address(self) -> int:
"""Get the base address of TLS."""
if pwndbg.gdblib.arch.current not in ("x86-64", "i386", "aarch64", "arm"):
# Not supported yet
return 0
tls_base = 0
if pwndbg.gdblib.arch.current == "x86-64":
@ -91,20 +57,12 @@ class module(ModuleType):
elif pwndbg.gdblib.arch.current == "aarch64":
tls_base = int(pwndbg.gdblib.regs.TPIDR_EL0)
# Sometimes, we need to get TLS base via errno location for the following reason:
# Sometimes, we need to get TLS base via pthread_self() for the following reason:
# For x86-64, fsbase might be 0 if we are remotely debugging and the GDB version <= 8.X
# For i386, gsbase might be 0 if we are remotely debugging
# For arm (32-bit), we doesn't have other choice
# For other archs, we can't get the TLS base address via register
# Note: aarch64 seems doesn't have this issue
is_valid_tls_base = (
pwndbg.gdblib.vmmap.find(tls_base) is not None
and tls_base % pwndbg.gdblib.arch.ptrsize == 0
)
return tls_base if is_valid_tls_base else self.get_tls_base_via_errno_location()
def reset(self) -> None:
# We should reset the offset when we attach to a new process
self._errno_offset = None
return tls_base if tls_base else self.call_pthread_self()
# To prevent garbage collection

@ -1136,7 +1136,7 @@ class DebugSymsHeap(GlibcMemoryAllocator):
thread's tcache.
"""
if self.has_tcache():
tcache = self.mp["sbrk_base"] + 0x10
tcache = self.get_sbrk_heap_region().vaddr + 0x10
if self.multithreaded:
tcache_addr = pwndbg.gdblib.memory.pvoid(
pwndbg.gdblib.symbol.static_linkage_symbol_address("tcache")
@ -1440,23 +1440,8 @@ class HeuristicHeap(GlibcMemoryAllocator):
if thread_arena_via_config > 0:
return Arena(thread_arena_via_config)
elif thread_arena_via_symbol:
if pwndbg.gdblib.symbol.static_linkage_symbol_address("thread_arena"):
# If the symbol is static-linkage symbol, we trust it.
return Arena(pwndbg.gdblib.memory.u(thread_arena_via_symbol))
# Check &thread_arena is nearby TLS base or not to avoid false positive.
tls_base = pwndbg.gdblib.tls.address
if tls_base:
if pwndbg.gdblib.arch.current in ("x86-64", "i386"):
is_valid_address = 0 < tls_base - thread_arena_via_symbol < 0x250
else: # elif pwndbg.gdblib.arch.current in ("aarch64", "arm"):
is_valid_address = 0 < thread_arena_via_symbol - tls_base < 0x250
is_valid_address = (
is_valid_address
and thread_arena_via_symbol in pwndbg.gdblib.vmmap.find(tls_base)
)
if is_valid_address:
if pwndbg.gdblib.tls.is_thread_local_variable(thread_arena_via_symbol):
thread_arena_struct_addr = pwndbg.gdblib.memory.u(thread_arena_via_symbol)
# Check &thread_arena is a valid address or not to avoid false positive.
if pwndbg.gdblib.vmmap.find(thread_arena_struct_addr):
@ -1554,7 +1539,6 @@ class HeuristicHeap(GlibcMemoryAllocator):
base_offset + offset
)
break
elif pwndbg.gdblib.arch.current == "arm":
# We need to find something near the first `mrc 15, ......`
# The flow of assembly code will like:
@ -1584,7 +1568,9 @@ class HeuristicHeap(GlibcMemoryAllocator):
)
break
if self._thread_arena_offset:
if self._thread_arena_offset and pwndbg.gdblib.tls.is_thread_local_variable_offset(
self._thread_arena_offset
):
tls_base = pwndbg.gdblib.tls.address
if tls_base:
thread_arena_struct_addr = tls_base + self._thread_arena_offset
@ -1600,6 +1586,9 @@ class HeuristicHeap(GlibcMemoryAllocator):
"""Locate a thread's tcache struct. We try to find its address in Thread Local Storage (TLS) first,
and if that fails, we guess it's at the first chunk of the heap.
"""
if not self.has_tcache():
print(message.warn("This version of GLIBC was not compiled with tcache support."))
return None
thread_cache_via_config = int(str(pwndbg.gdblib.config.tcache), 0)
thread_cache_via_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address(
"tcache"
@ -1608,32 +1597,26 @@ class HeuristicHeap(GlibcMemoryAllocator):
self._thread_cache = self.tcache_perthread_struct(thread_cache_via_config)
return self._thread_cache
elif thread_cache_via_symbol:
if pwndbg.gdblib.symbol.static_linkage_symbol_address("tcache"):
# If the symbol is static-linkage symbol, we trust it.
# Check &tcache is nearby TLS base or not to avoid false positive.
if pwndbg.gdblib.tls.is_thread_local_variable(thread_cache_via_symbol):
thread_cache_struct_addr = pwndbg.gdblib.memory.u(thread_cache_via_symbol)
if pwndbg.gdblib.vmmap.find(thread_cache_struct_addr):
self._thread_cache = self.tcache_perthread_struct(thread_cache_struct_addr)
return self._thread_cache
# Check &tcache is nearby TLS base or not to avoid false positive.
tls_base = pwndbg.gdblib.tls.address
if tls_base:
if pwndbg.gdblib.arch.current in ("x86-64", "i386"):
is_valid_address = 0 < tls_base - thread_cache_via_symbol < 0x250
else: # elif pwndbg.gdblib.arch.current in ("aarch64", "arm"):
is_valid_address = 0 < thread_cache_via_symbol - tls_base < 0x250
is_valid_address = (
is_valid_address
and thread_cache_via_symbol in pwndbg.gdblib.vmmap.find(tls_base)
)
if is_valid_address:
thread_cache_struct_addr = pwndbg.gdblib.memory.u(thread_cache_via_symbol)
# If target is single-threaded, then the tcache struct is at the first chunk of the heap.
# We try to find the address by using mp_.srck_base + 0x10 first since it's more reliable than other methods.
if not self.multithreaded:
try:
thread_cache_struct_addr = self.get_sbrk_heap_region().vaddr + 0x10
if pwndbg.gdblib.vmmap.find(thread_cache_struct_addr):
self._thread_cache = self.tcache_perthread_struct(thread_cache_struct_addr)
return self._thread_cache
except SymbolUnresolvableError:
# mp_ is not available
pass
if self.has_tcache():
# Each thread has a tcache struct, and the address of the tcache struct is stored in the TLS.
# Try to find tcache in TLS, so first we need to find the offset of tcache to TLS base
if not self._thread_cache_offset and pwndbg.gdblib.symbol.address("__libc_malloc"):
# TODO/FIXME: This method should be updated if we find a better way to find the target assembly code
@ -1758,38 +1741,18 @@ class HeuristicHeap(GlibcMemoryAllocator):
reg = ldr_instr.operands[0].str
if instr.mnemonic == "add" and instr.op_str == reg + ", pc":
offset = ldr_instr.operands[1].mem.disp
offset = pwndbg.gdblib.memory.s32(
(ldr_instr.address + 4 & -4) + offset
)
offset = pwndbg.gdblib.memory.s32((ldr_instr.address + 4 & -4) + offset)
self._thread_cache_offset = (
pwndbg.gdblib.memory.s32(instr.address + 4 + offset) + 4
)
break
# Validate the the offset we found
is_offset_valid = False
if pwndbg.gdblib.arch.current in ("x86-64", "i386"):
# The offset to tls should be a negative integer for x86/x64, but it can't be too small
# If it is too small, we find a wrong value
is_offset_valid = (
self._thread_cache_offset and -0x250 < self._thread_cache_offset < 0
)
elif pwndbg.gdblib.arch.current in ("aarch64", "arm"):
# The offset to tls should be a positive integer for aarch64, but it can't be too big
# If it is too big, we find a wrong value
is_offset_valid = (
self._thread_cache_offset and 0 < self._thread_cache_offset < 0x250
)
is_offset_valid = (
is_offset_valid and self._thread_cache_offset % pwndbg.gdblib.arch.ptrsize == 0
)
# If the offset is valid, we add the offset to TLS base to locate the tcache struct
# Note: We do a lot of checks here to make sure the offset and address we found is valid,
# so we can use our fallback if they're invalid
if is_offset_valid:
if self._thread_cache_offset and pwndbg.gdblib.tls.is_thread_local_variable_offset(
self._thread_cache_offset
):
tls_base = pwndbg.gdblib.tls.address
if tls_base:
thread_cache_struct_addr = pwndbg.gdblib.memory.pvoid(
@ -1818,9 +1781,6 @@ class HeuristicHeap(GlibcMemoryAllocator):
return self._thread_cache
print(message.warn("This version of GLIBC was not compiled with tcache support."))
return None
@property
def mp(self):
mp_via_config = int(str(pwndbg.gdblib.config.mp), 0)

@ -1,4 +1,5 @@
import gdb
import pytest
import pwndbg
import pwndbg.gdblib.arch
@ -260,9 +261,13 @@ def test_main_arena_heuristic(start_binary):
pwndbg.heap.current = type(pwndbg.heap.current)() # Reset the heap object of pwndbg
# Level 3: We check we can get the address of `main_arena` by parsing the memory
for _ in range(2):
with mock_for_heuristic(mock_all=True):
# Check the address of `main_arena` is correct
assert pwndbg.heap.current.main_arena.address == main_arena_addr_via_debug_symbol
# Check if it works when there's more than one arena
gdb.execute("continue")
assert gdb.selected_thread().num == 2
def test_mp_heuristic(start_binary):
@ -328,12 +333,18 @@ def test_global_max_fast_heuristic(start_binary):
assert pwndbg.heap.current._global_max_fast_addr == global_max_fast_addr_via_debug_symbol
def test_thread_cache_heuristic(start_binary):
@pytest.mark.parametrize(
"is_multi_threaded", [False, True], ids=["single-threaded", "multi-threaded"]
)
def test_thread_cache_heuristic(start_binary, is_multi_threaded):
# TODO: Support other architectures or different libc versions
start_binary(HEAP_MALLOC_CHUNK)
gdb.execute("set resolve-heap-via-heuristic force")
gdb.execute("break break_here")
gdb.execute("continue")
if is_multi_threaded:
gdb.execute("continue")
assert gdb.selected_thread().num == 2
# Use the debug symbol to find the address of `thread_cache`
tcache_addr_via_debug_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address(
@ -363,12 +374,18 @@ def test_thread_cache_heuristic(start_binary):
assert pwndbg.heap.current.thread_cache.address == thread_cache_addr_via_debug_symbol
def test_thread_arena_heuristic(start_binary):
@pytest.mark.parametrize(
"is_multi_threaded", [False, True], ids=["single-threaded", "multi-threaded"]
)
def test_thread_arena_heuristic(start_binary, is_multi_threaded):
# TODO: Support other architectures or different libc versions
start_binary(HEAP_MALLOC_CHUNK)
gdb.execute("set resolve-heap-via-heuristic force")
gdb.execute("break break_here")
gdb.execute("continue")
if is_multi_threaded:
gdb.execute("continue")
assert gdb.selected_thread().num == 2
# Use the debug symbol to find the value of `thread_arena`
thread_arena_via_debug_symbol = pwndbg.gdblib.symbol.static_linkage_symbol_address(
@ -392,12 +409,18 @@ def test_thread_arena_heuristic(start_binary):
assert pwndbg.heap.current.thread_arena.address == thread_arena_via_debug_symbol
def test_heuristic_fail_gracefully(start_binary):
@pytest.mark.parametrize(
"is_multi_threaded", [False, True], ids=["single-threaded", "multi-threaded"]
)
def test_heuristic_fail_gracefully(start_binary, is_multi_threaded):
# TODO: Support other architectures or different libc versions
start_binary(HEAP_MALLOC_CHUNK)
gdb.execute("set resolve-heap-via-heuristic force")
gdb.execute("break break_here")
gdb.execute("continue")
if is_multi_threaded:
gdb.execute("continue")
assert gdb.selected_thread().num == 2
def _test_heuristic_fail_gracefully(name):
try:

Loading…
Cancel
Save