From f1d1f7cd560e144a45e1859ed7c1f6824f222602 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Stig=20K=C3=A4mpf=20Svane?= <jonas@damgaardsvej.dk>
Date: Thu, 14 Jun 2018 20:39:02 +0200
Subject: [PATCH] Fix and enhance xinfo command (#480)

* Instead of unstable parsing of readelf output, use the elftools ELF wrapper for parsing PT_LOAD segments

* Fix #434 xinfo command doesn't show File(Disk) info on non-PIE binaries

Also remove some trailing whitespace

Also fix another bug in xinfo; now it can show the disk offset of all
mmap files, not just the primary executable

* New xinfo feature: Print containing ELF sections for file-backed addresses

* Only print header for ELF sections if at least 1 section contains the address

* Fix bug in section offset calculation when printing containing ELF sections

* Refactor ELF file parsing helpers for cleaner separtion of ELF metadata parsing and enrichment, and a specific use scenario (getting a list of segments/sections containing a given virtual addr). Also makes implementing caching parse results easier

Adjust xinfo command to these API changes

* Fix bug: Reference mem_end instead of file_end

* Don't use underscore variable names; change decorator to reset_on_objfile

* Update xinfo.py
---
 pwndbg/commands/xinfo.py   |  37 ++++++++++---
 pwndbg/elf.py              | 105 +++++++++++++++++++++++++++++++++++++
 pwndbg/wrappers/readelf.py |  52 ------------------
 3 files changed, 134 insertions(+), 60 deletions(-)

diff --git a/pwndbg/commands/xinfo.py b/pwndbg/commands/xinfo.py
index 6dea5045f..2533655ec 100644
--- a/pwndbg/commands/xinfo.py
+++ b/pwndbg/commands/xinfo.py
@@ -21,13 +21,13 @@ import pwndbg.vmmap
 import pwndbg.wrappers
 
 parser = argparse.ArgumentParser(description='Shows offsets of the specified address to useful other locations')
-parser.add_argument('address', nargs='?', default='$pc', 
+parser.add_argument('address', nargs='?', default='$pc',
                     help='Address to inspect')
 
 def print_line(name, addr, first, second, op, width = 20):
 
     print("{} {} = {} {} {:#x}".format(name.rjust(width), M.get(addr),
-        M.get(first) if type(first) is not str else first.ljust(len(hex(addr))),
+        M.get(first) if not isinstance(first, str) else first.ljust(len(hex(addr).rstrip('L'))),
         op, second,))
 
 def xinfo_stack(page, addr):
@@ -63,17 +63,38 @@ def xinfo_mmap_file(page, addr):
     file_name = page.objfile
     objpages = filter(lambda p: p.objfile == file_name, pwndbg.vmmap.get())
     first = sorted(objpages, key = lambda p: p.vaddr)[0]
+
+    # print offset from ELF base load address
     rva = addr - first.vaddr
+    print_line("File (Base)", addr, first.vaddr, rva, "+")
+
+    # find possible LOAD segments that designate memory and file backings
+    containing_loads = [seg for seg in pwndbg.elf.get_containing_segments(file_name, first.vaddr, addr)
+                        if seg['p_type'] == 'PT_LOAD']
+
+    for segment in containing_loads:
+        if segment['p_type'] == 'PT_LOAD' and addr < segment['x_vaddr_mem_end']:
+            offset = addr - segment['p_vaddr']
+            print_line('File (Segment)', addr, segment['p_vaddr'], offset, '+')
+            break
+
+    for segment in containing_loads:
+        if segment['p_type'] == 'PT_LOAD' and addr < segment['x_vaddr_file_end']:
+            file_offset = segment['p_offset'] + (addr - segment['p_vaddr'])
+            print_line("File (Disk)", addr, file_name, file_offset, "+")
+            break
+    else:
+        print('{} {} = [not file backed]'.format('File (Disk)'.rjust(20), M.get(addr)))
 
-    print_line("File (Memory)", addr, first.vaddr, rva, "+")
+    containing_sections = pwndbg.elf.get_containing_sections(file_name, first.vaddr, addr)
+    if len(containing_sections) > 0:
+        print('\n Containing ELF sections:')
+        for sec in containing_sections:
+            print_line(sec['x_name'], addr, sec['sh_addr'], addr - sec['sh_addr'], '+')
 
-    for segment in pwndbg.wrappers.readelf.get_load_segment_info():
-        if rva >= segment["VirtAddr"] and rva <= segment["VirtAddr"] + segment["MemSiz"]:
-            print_line("File (Disk)", addr, file_name, rva - (segment["VirtAddr"] - segment["Offset"]), "+")
 
 def xinfo_default(page, addr):
     # Just print the distance to the beginning of the mapping
-
     print_line("Mapped Area", addr, page.vaddr, addr - page.vaddr, "+")
 
 
@@ -100,6 +121,6 @@ def xinfo(address=None):
         xinfo_stack(page, addr)
     else:
         xinfo_default(page, addr)
-    
+
     if page.is_memory_mapped_file:
         xinfo_mmap_file(page, addr)
diff --git a/pwndbg/elf.py b/pwndbg/elf.py
index bc7f76644..9fac6985b 100644
--- a/pwndbg/elf.py
+++ b/pwndbg/elf.py
@@ -14,8 +14,11 @@ from __future__ import unicode_literals
 
 import ctypes
 import sys
+from collections import namedtuple
 
 import gdb
+from elftools.elf.constants import SH_FLAGS
+from elftools.elf.elffile import ELFFile
 from six.moves import reload_module
 
 import pwndbg.abi
@@ -37,6 +40,19 @@ ET_EXEC, ET_DYN  = 2,3
 module = sys.modules[__name__]
 
 
+class ELFInfo(namedtuple('ELFInfo', 'header sections segments')):
+    """
+    ELF metadata and structures.
+    """
+    @property
+    def is_pic(self):
+        return self.header['e_type'] == 'ET_DYN'
+
+    @property
+    def is_pie(self):
+        return self.is_pic
+
+
 @pwndbg.events.start
 @pwndbg.events.new_objfile
 def update():
@@ -68,6 +84,95 @@ def read(typ, address, blob=None):
     return obj
 
 
+@pwndbg.memoize.reset_on_objfile
+def get_elf_info(filepath):
+    """
+    Parse and return ELFInfo.
+
+    Adds various calculated properties to the ELF header, segments and sections.
+    Such added properties are those with prefix 'x_' in the returned dicts.
+    """
+    local_path = pwndbg.file.get_file(filepath)
+    with open(local_path, 'rb') as f:
+        elffile = ELFFile(f)
+        header = dict(elffile.header)
+        segments = []
+        for seg in elffile.iter_segments():
+            s = dict(seg.header)
+            s['x_perms'] = [
+                mnemonic for mask, mnemonic in [(PF_R, 'read'), (PF_W, 'write'), (PF_X, 'execute')]
+                if s['p_flags'] & mask != 0
+            ]
+            # end of memory backing
+            s['x_vaddr_mem_end'] = s['p_vaddr'] + s['p_memsz']
+            # end of file backing
+            s['x_vaddr_file_end'] = s['p_vaddr'] + s['p_filesz']
+            segments.append(s)
+        sections = []
+        for sec in elffile.iter_sections():
+            s = dict(sec.header)
+            s['x_name'] = sec.name
+            s['x_addr_mem_end'] = s['x_addr_file_end'] = s['sh_addr'] + s['sh_size']
+            sections.append(s)
+        return ELFInfo(header, sections, segments)
+
+
+@pwndbg.memoize.reset_on_objfile
+def get_elf_info_rebased(filepath, vaddr):
+    """
+    Parse and return ELFInfo with all virtual addresses rebased to vaddr
+    """
+    raw_info = get_elf_info(filepath)
+    # silently ignores "wrong" vaddr supplied for non-PIE ELF
+    load = vaddr if raw_info.is_pic else 0
+    headers = dict(raw_info.header)
+    headers['e_entry'] += load
+
+    segments = []
+    for seg in raw_info.segments:
+        s = dict(seg)
+        for vaddr_attr in ['p_vaddr', 'x_vaddr_mem_end', 'x_vaddr_file_end']:
+            s[vaddr_attr] += load
+        segments.append(s)
+
+    sections = []
+    for sec in raw_info.sections:
+        s = dict(sec)
+        for vaddr_attr in ['sh_addr', 'x_addr_mem_end', 'x_addr_file_end']:
+            s[vaddr_attr] += load
+        sections.append(s)
+
+    return ELFInfo(headers, sections, segments)
+
+
+def get_containing_segments(elf_filepath, elf_loadaddr, vaddr):
+    elf = get_elf_info_rebased(elf_filepath, elf_loadaddr)
+    segments = []
+    for seg in elf.segments:
+        # disregard non-LOAD segments that are not file-backed (typically STACK)
+        if 'LOAD' not in seg['p_type'] and seg['p_filesz'] == 0:
+            continue
+        # disregard segments not containing vaddr
+        if vaddr < seg['p_vaddr'] or vaddr >= seg['x_vaddr_mem_end']:
+            continue
+        segments.append(dict(seg))
+    return segments
+
+
+def get_containing_sections(elf_filepath, elf_loadaddr, vaddr):
+    elf = get_elf_info_rebased(elf_filepath, elf_loadaddr)
+    sections = []
+    for sec in elf.sections:
+        # disregard sections not occupying memory
+        if sec['sh_flags'] & SH_FLAGS.SHF_ALLOC == 0:
+            continue
+        # disregard sections that do not contain vaddr
+        if vaddr < sec['sh_addr'] or vaddr >= sec['x_addr_mem_end']:
+            continue
+        sections.append(dict(sec))
+    return sections
+
+
 @pwndbg.proc.OnlyWhenRunning
 @pwndbg.memoize.reset_on_start
 def exe():
diff --git a/pwndbg/wrappers/readelf.py b/pwndbg/wrappers/readelf.py
index c5a0d4fd7..343c7d2a1 100644
--- a/pwndbg/wrappers/readelf.py
+++ b/pwndbg/wrappers/readelf.py
@@ -34,55 +34,3 @@ def _extract_jumps(line):
             return False
     except IndexError:
         return False
-
-@pwndbg.wrappers.OnlyWithCommand(cmd_name)
-def get_load_segment_info():
-    '''
-    Looks for LOAD sections by parsing the output of `readelf --program-headers <binary>`
-    '''
-    local_path = pwndbg.file.get_file(pwndbg.proc.exe)
-    cmd = [get_jmpslots.cmd_path, "--program-headers", local_path]
-    readelf_out = pwndbg.wrappers.call_cmd(cmd)
-
-    segments = []
-    load_found = False
-
-    # Output from readelf is 
-    # Type           Offset             VirtAddr           PhysAddr
-    #                FileSiz            MemSiz             Flags  Align
-    # LOAD           0x0000000000000000 0x0000000000000000 0x0000000000000000
-    #                0x0000000000000830 0x0000000000000830  R E    0x200000
-    #
-    ############################################################################
-    #
-    # NOTE: On some readelf versions the Align column might not be prefixed with 0x
-    # See https://github.com/pwndbg/pwndbg/issues/427
-    #
-    # Account for this using two regular expressions
-    re_first = re.compile(r"\s+LOAD\s+(0x[0-9A-Fa-f]+) (0x[0-9A-Fa-f]+) (0x[0-9A-Fa-f]+)")
-    re_secnd = re.compile(r"\s+(0x[0-9A-Fa-f]+) (0x[0-9A-Fa-f]+)  (.)(.)(.)\s+(0x)?([0-9A-Fa-f]+)")
-    hex2int = lambda x: int(x, 16)
-
-    for line in readelf_out.splitlines():
-        if "LOAD" in line:
-            load_found = True
-            offset, vaddr, paddr = map(hex2int, re_first.match(line).groups())
-        elif load_found:
-            fsize, msize, read, write, execute, _optional_prefix, align = re_secnd.match(line).groups()
-            fsize, msize, align = map(hex2int, (fsize, msize, '0x' + align))
-            read = read == "R"
-            write = write == "W"
-            execute = execute == "E"
-
-            segments.append({"Offset":   offset,
-                             "VirtAddr": vaddr,
-                             "PhysAddr": paddr,
-                             "FileSiz": fsize,
-                             "MemSiz": msize,
-                             "FlagsRead": read,
-                             "FlagsWrite": write,
-                             "FlagsExecute": execute})
-
-            load_found = False
-
-    return segments