From 5849d27446f50280ed1ea81b98c40e0faff2bf06 Mon Sep 17 00:00:00 2001 From: Andrej Zieger Date: Wed, 18 Mar 2020 17:49:56 +0100 Subject: [PATCH] [WIP] Feature: show ghidra decompiled code in context (#715) * Feature: show ghidra decompiled code in context With the help of radare2, r2pipe and r2ghidra-dec * Update FEATURES.md * Include review comments * Removed embarrassing auto added guards Co-authored-by: Disconnect3d --- FEATURES.md | 26 ++++++++++ pwndbg/commands/context.py | 102 +++++++++++++++++++++++++++++++++++-- 2 files changed, 125 insertions(+), 3 deletions(-) diff --git a/FEATURES.md b/FEATURES.md index 57b5fcf8b..be3437c05 100644 --- a/FEATURES.md +++ b/FEATURES.md @@ -89,6 +89,32 @@ contextwatch exectue "ds BUF" cwatch execute "x/20x $rsp" ``` +### Ghidra + +With the help of [radare2](https://github.com/radareorg/radare2) it is possible to show the +decompiled source code of the ghidra decompiler. + +However, this comes with some prerequisites. +* First: you have to have installed radare2 and it must be found by gdb (within path) +* Second: you have to install the ghidra plugin for radare2 + [r2ghidra-dec](https://github.com/radareorg/r2ghidra-dec) +* Third: r2pipe has to be installed in the python-context gdb is using + +The decompiled source be shown as part of the context by adding `ghidra` to `set context-sections` +or by calling `ctx-ghidra [function]` manually. + +Be warned, the first call to radare2/r2ghidra-dec is rather slow! Subsequent requests for decompiled +source will be faster. And it does take up some resources as the radare2 instance is kept by r2pipe +to enable faster subsequent analysis. + +With those performance penalties it is resonable to not have it launch always. Therefore it includes +an option to only start it when required with `set context-ghidra`: +* `set context-ghidra always`: always trigger the ghidra context +* `set context-ghidra never`: never trigger the ghidra context except when called manually +* `set context-ghidra if-no-source`: invoke ghidra if no source code is available + +Remark: the plugin tries to guess the correct current line and mark it with "-->", but it might +get it wrong. ## Disassembly diff --git a/pwndbg/commands/context.py b/pwndbg/commands/context.py index 93376d02f..3853fa7fd 100644 --- a/pwndbg/commands/context.py +++ b/pwndbg/commands/context.py @@ -9,6 +9,8 @@ import argparse import ast import codecs import ctypes +import json +import os import sys from collections import defaultdict from io import open @@ -47,7 +49,7 @@ def clear_screen(out=sys.stdout): config_clear_screen = pwndbg.config.Parameter('context-clear-screen', False, 'whether to clear the screen before printing the context') config_output = pwndbg.config.Parameter('context-output', 'stdout', 'where pwndbg should output ("stdout" or file/tty).') config_context_sections = pwndbg.config.Parameter('context-sections', - 'regs disasm code stack backtrace expressions', + 'regs disasm code ghidra stack backtrace expressions', 'which context sections are displayed (controls order)') # Storing output configuration per section @@ -208,6 +210,99 @@ def context_expressions(target=sys.stdout, with_banner=True, width=None): output.extend(lines) return banner + output if with_banner else output +# Ghidra integration through radare2 +# This is not (sorry couldn't help it) "the yellow of the egg" +# Using radare2 is suboptimal and will only be an intermediate step to have +# ghidra decompiler within pwndbg. +# But having ghidra in pwndgb is a lot more work as it requires quite some code and c/c++ +radare2 = {} + +config_context_ghidra = pwndbg.config.Parameter('context-ghidra', + 'never', + 'if or/when to try to decompile with ' + 'ghidra (slow and requires radare2/r2pipe) ' + '(valid values: always, never, if-no-source)') +def init_radare2(filename): + r2 = radare2.get(filename) + if r2: + return r2 + import r2pipe + r2 = r2pipe.open(filename) + radare2[filename] = r2 + r2.cmd("aaaa") + return r2 + +parser = argparse.ArgumentParser() +parser.description = """Show current function decompiled by ghidra""" +parser.add_argument("func", type=str, default=None, nargs="?", + help="Function to be shown. Defaults to current") +@pwndbg.commands.ArgparsedCommand(parser, aliases=['ctx-ghidra']) +def contextghidra(func): + print("\n".join(context_ghidra(func, with_banner=False, force_show=True))) + +def context_ghidra(func=None, target=sys.stdout, with_banner=True, width=None, force_show=False): + banner = [pwndbg.ui.banner("ghidra decompile", target=target, width=width)] + + if config_context_ghidra == "never" and not force_show: + return [] + elif config_context_ghidra == "if-no-source" and not force_show: + try: + with open(gdb.selected_frame().find_sal().symtab.fullname()) as _: + pass + except: # a lot can go wrong in search of source code. + return [] # we don't care what, just that it did not work out well... + + filename = gdb.current_progspace().filename + try: + r2 = init_radare2(filename) + # LD list supported decompilers (e cmd.pdc=?) + # Outputs for example:: pdc\npdg + if not "pdg" in r2.cmd("LD").split("\n"): + return banner + ["radare2 plugin r2ghidra-dec must be installed and available from r2"] + except ImportError: # no r2pipe present + return banner + ["r2pipe not available, but required for r2->ghidra-bridge"] + if func is None: + try: + func = hex(pwndbg.regs[pwndbg.regs.current.pc]) + except: + func = "main" + src = r2.cmdj("pdgj @" + func) + source = src.get("code", "") + curline = None + try: + cur = pwndbg.regs[pwndbg.regs.current.pc] + except AttributeError: + cur = None # If not running there is no current.pc + if cur is not None: + closest = 0 + for off in (a.get("offset", 0) for a in src.get("annotations", [])): + if abs(cur - closest) > abs(cur - off): + closest = off + pos_annotations = sorted([a for a in src.get("annotations", []) if a.get("offset") == closest], + key=lambda a: a["start"]) + if pos_annotations: + curline = source.count("\n", 0, pos_annotations[0]["start"]) + source = source.split("\n") + # Append --> for the current line if possible + if curline is not None: + line = source[curline] + if line.startswith(' '): + line = line[4:] + source[curline] = '--> ' + line + # Join the source for highlighting + source = "\n".join(source) + if pwndbg.config.syntax_highlight: + # highlighting depends on the file extension to guess the language, so try to get one... + try: # try to read the source filename from debug information + src_filename = gdb.selected_frame().find_sal().symtab.fullname() + except: # if non, take the original filename and maybe append .c (just assuming is was c) + src_filename = filename+".c" if os.path.basename(filename).find(".") < 0 else filename + source = H.syntax_highlight(source, src_filename) + source = source.split("\n") + return banner + source if with_banner else source + + + # @pwndbg.events.stop parser = argparse.ArgumentParser() @@ -245,10 +340,10 @@ def context(subcontext=None): with_banner=settings.get("banner_top", True))) for target, res in result.items(): - settings = result_settings[target] + settings = result_settings[target] if len(res) > 0 and settings.get("banner_bottom", True): with target as out: - res.append(pwndbg.ui.banner("", target=out, + res.append(pwndbg.ui.banner("", target=out, width=settings.get("width", None))) for target, lines in result.items(): @@ -558,6 +653,7 @@ context_sections = { 's': context_stack, 'b': context_backtrace, 'e': context_expressions, + 'g': context_ghidra, }