From 5c3d1d3560ab2b19b5dc835dffbae84c4e86b31e Mon Sep 17 00:00:00 2001 From: OBarronCS <55004530+OBarronCS@users.noreply.github.com> Date: Thu, 13 Feb 2025 03:40:57 -0800 Subject: [PATCH] Properly decode binary data sent via GDB commands (#2730) --- pwndbg/aglib/file.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/pwndbg/aglib/file.py b/pwndbg/aglib/file.py index 362a051b5..fdf2e966e 100644 --- a/pwndbg/aglib/file.py +++ b/pwndbg/aglib/file.py @@ -185,7 +185,7 @@ def vfile_readlink(pathname: str | bytes) -> bytes: parts = response[1:].split(b";", 1) # bytes_read = int(parts[0], 16) - target = parts[1] + target = gdb_memtox_inverse(parts[1]) return target @@ -234,6 +234,27 @@ def vfile_open(filename: str, flags: int, mode: int) -> int: return file_descriptor +# GDB defines the encoding of binary data transferred through commands here: +# - https://sourceware.org/gdb/current/onlinedocs/gdb.html/Overview.html#:~:text=The%20binary%20data%20representation%20uses%207d +# This function unescapes these character +# Example of QEMU function that does this escaping: +# https://github.com/qemu/qemu/blob/de278e54aefed143526174335f8286f7437d20be/gdbstub/gdbstub.c#L184 +def gdb_memtox_inverse(data: bytes) -> bytes: + buffer = bytearray() + + i = 0 + while i < len(data): + b = data[i] + if b == 125: # == ord("}"): + buffer.append(data[i + 1] ^ 0x20) + i += 1 + else: + buffer.append(b) + i += 1 + + return buffer + + def vfile_pread(fd: int, size: int, offset: int) -> Tuple[int, bytes]: """ Reads data from a file descriptor. @@ -249,7 +270,9 @@ def vfile_pread(fd: int, size: int, offset: int) -> Tuple[int, bytes]: parts = response[1:].split(b";", 1) bytes_read = int(parts[0].decode(), 16) - data = parts[1] + # We have to decode the data, because some bytes - #,$,*,} - have special encodings + data = gdb_memtox_inverse(parts[1]) + return bytes_read, data