Profiling and performance improvements (#421)

* Add scripts for benchmarking and profiling pwndbg commands * Fix performance issue in emulator.py Register to unicorn enum lookup was really ineffective. Replaced with parsing (consts) on initialization time, and only dict lookup on hot path. * Fix performance issue in syntax_highlight. Current code initialized pygments on each syntax_highlight(), which apparently took some time. * Minor performance improvements in syntax_highlight * Memoize IDA availability. Not sure it this is a valid solution, I have never used pwndbg with IDA. However, we should not try to connect to ida on each context(), as this takes 25% of current exec time. * Explicitly source gdbinit in benchmark scripts.
8 years ago · a3da3f0daa
parent 8ecaa67043
commit a3da3f0daa
7 changed files with 84 additions and 18 deletions
--- a/profiling/.gitignore
+++ b/profiling/.gitignore
@ -0,0 +1,3 @@
+test
+stats
+stats.log
--- a/profiling/benchmark.sh
+++ b/profiling/benchmark.sh
@ -0,0 +1,13 @@
+#!/bin/bash
+# Benchmark context command
+make test > /dev/null
+git log --abbrev-commit --pretty=oneline HEAD^..HEAD
+gdb ./test \
+  -ex "source ../gdbinit.py" \
+  -ex "b main" -ex "r" \
+  -ex "python import timeit; print('      1ST RUN:', timeit.repeat('pwndbg.commands.context.context()', repeat=1, number=1, globals=globals())[0])" \
+  -ex "si" \
+  -ex "python import timeit; print('      2ND RUN:', timeit.repeat('pwndbg.commands.context.context()', repeat=1, number=1, globals=globals())[0])" \
+  -ex "si" \
+  -ex "python import timeit; print('MULTIPLE RUNS:', timeit.repeat('pwndbg.commands.context.context()', repeat=1, number=10, globals=globals())[0] / 10)" \
+  -ex "quit" | grep 'RUNS*:'
--- a/profiling/profile.sh
+++ b/profiling/profile.sh
@ -0,0 +1,18 @@
+#!/bin/bash
+# Quick and dirty script to profile pwndbg using cProfile.
+make test > /dev/null
+git log --abbrev-commit --pretty=oneline HEAD^..HEAD
+# To profile first run, remove -ex "context".
+gdb ./test \
+  -ex "source ../gdbinit.py" \
+  -ex "b main" -ex "r" \
+  -ex "context" \
+  -ex "python import cProfile; cProfile.run('pwndbg.commands.context.context()', 'stats')" \
+  -ex "quit"
+
+python3 -c "
+import pstats
+p = pstats.Stats('stats')
+p.strip_dirs().sort_stats('tottime').print_stats(20)
+"
+[ -x /usr/local/bin/pyprof2calltree ] && command -v kcachegrind >/dev/null 2>&1 && /usr/local/bin/pyprof2calltree -k -i stats
--- a/profiling/test.c
+++ b/profiling/test.c
@ -0,0 +1,3 @@
+int main() {
+  while(1);
+}
--- a/pwndbg/color/syntax_highlight.py
+++ b/pwndbg/color/syntax_highlight.py
@ -21,8 +21,14 @@ except ImportError:

 pwndbg.config.Parameter('syntax-highlight', True, 'Source code / assembly syntax highlight')
 style = theme.Parameter('syntax-highlight-style', 'monokai', 'Source code / assembly syntax highlight stylename of pygments module')
+
+formatter = pygments.formatters.Terminal256Formatter(style=str(style))
+pwntools_lexer = PwntoolsLexer()
+lexer_cache = {}
+
@pwndbg.config.Trigger([style])
 def check_style():
+    global formatter
    try:
        formatter = pygments.formatters.Terminal256Formatter(
            style=str(style)
@ -40,20 +46,17 @@ def syntax_highlight(code, filename='.asm'):

    filename = os.path.basename(filename)

-    formatter = pygments.formatters.Terminal256Formatter(
-        style=str(style)
-    )
-
-    lexer = None
+    lexer = lexer_cache.get(filename, None)

    # If source code is asm, use our customized lexer.
    # Note: We can not register our Lexer to pygments and use their APIs,
    # since the pygment only search the lexers installed via setuptools.
-    for glob_pat in PwntoolsLexer.filenames:
-        pat = '^' + glob_pat.replace('.', r'\.').replace('*', r'.*') + '$'
-        if re.match(pat, filename):
-            lexer = PwntoolsLexer()
-            break
+    if not lexer:
+        for glob_pat in PwntoolsLexer.filenames:
+            pat = '^' + glob_pat.replace('.', r'\.').replace('*', r'.*') + '$'
+            if re.match(pat, filename):
+                lexer = pwntools_lexer
+                break

    if not lexer:
        try:
@ -63,6 +66,7 @@ def syntax_highlight(code, filename='.asm'):
            pass

    if lexer:
+        lexer_cache[filename] = lexer
        code = pygments.highlight(code, lexer, formatter).rstrip()

    return code
--- a/pwndbg/emu/emulator.py
+++ b/pwndbg/emu/emulator.py
@ -10,6 +10,7 @@ from __future__ import unicode_literals

 import binascii
 import inspect
+import re

 import capstone as C
 import gdb
@ -21,6 +22,19 @@ import pwndbg.emu.emulator
 import pwndbg.memory
 import pwndbg.regs

+
+def parse_consts(u_consts):
+    """
+    Unicorn "consts" is a python module consisting of a variable definition
+    for each known entity. We repack it here as a dict for performance.
+    """
+    consts = {}
+    for name in dir(u_consts):
+        if name.startswith('UC_'):
+            consts[name] = getattr(u_consts, name)
+    return consts
+
+
 # Map our internal architecture names onto Unicorn Engine's architecture types.
 arch_to_UC = {
    'i386':    U.UC_ARCH_X86,
@ -33,12 +47,12 @@ arch_to_UC = {
 }

 arch_to_UC_consts = {
-    'i386':    U.x86_const,
-    'x86-64':  U.x86_const,
-    'mips':    U.mips_const,
-    'sparc':   U.sparc_const,
-    'arm':     U.arm_const,
-    'aarch64': U.arm64_const,
+    'i386':    parse_consts(U.x86_const),
+    'x86-64':  parse_consts(U.x86_const),
+    'mips':    parse_consts(U.mips_const),
+    'sparc':   parse_consts(U.sparc_const),
+    'arm':     parse_consts(U.arm_const),
+    'aarch64': parse_consts(U.arm64_const),
 }

 # Map our internal architecture names onto Unicorn Engine's architecture types.
@ -98,6 +112,7 @@ e = pwndbg.emu.emulator.Emulator()
 e.until_jump()
 '''

+
 class Emulator(object):
    def __init__(self):
        self.arch = pwndbg.arch.current
@ -107,6 +122,14 @@ class Emulator(object):

        self.consts = arch_to_UC_consts[self.arch]

+        # Just registers, for faster lookup
+        self.const_regs = {}
+        r = re.compile(r'^UC_.*_REG_(.*)$')
+        for k,v in self.consts.items():
+            m = r.match(k)
+            if m:
+                self.const_regs[m.group(1)] = v
+
        self.uc_mode = self.get_uc_mode()
        debug("# Instantiating Unicorn for %s" % self.arch)
        debug("uc = U.Uc(%r, %r)" % (arch_to_UC[self.arch], self.uc_mode))
@ -261,8 +284,9 @@ class Emulator(object):
        #  'eax' ==> enum
        #
        if reg in self.regs.all:
-            for reg_enum in (c for c in dir(self.consts) if c.endswith('_' + reg.upper())):
-                return getattr(self.consts, reg_enum)
+            e = self.const_regs.get(reg.upper(), None)
+            if e is not None:
+                return e

        # If we're looking for an abstract register which *is* accounted for,
        # we can also do an indirect lookup.
--- a/pwndbg/ida.py
+++ b/pwndbg/ida.py
@ -120,6 +120,7 @@ def returns_address(function):
    return wrapper


+@pwndbg.memoize.reset_on_objfile
@withIDA
 def available():
    return True