From a3da3f0daa03bc71384914572ab4ccd0dc5a42b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20D=C4=99bski?= Date: Sat, 24 Feb 2018 16:45:14 +0100 Subject: [PATCH] Profiling and performance improvements (#421) * Add scripts for benchmarking and profiling pwndbg commands * Fix performance issue in emulator.py Register to unicorn enum lookup was really ineffective. Replaced with parsing (consts) on initialization time, and only dict lookup on hot path. * Fix performance issue in syntax_highlight. Current code initialized pygments on each syntax_highlight(), which apparently took some time. * Minor performance improvements in syntax_highlight * Memoize IDA availability. Not sure it this is a valid solution, I have never used pwndbg with IDA. However, we should not try to connect to ida on each context(), as this takes 25% of current exec time. * Explicitly source gdbinit in benchmark scripts. --- profiling/.gitignore | 3 +++ profiling/benchmark.sh | 13 +++++++++++ profiling/profile.sh | 18 ++++++++++++++ profiling/test.c | 3 +++ pwndbg/color/syntax_highlight.py | 24 +++++++++++-------- pwndbg/emu/emulator.py | 40 +++++++++++++++++++++++++------- pwndbg/ida.py | 1 + 7 files changed, 84 insertions(+), 18 deletions(-) create mode 100644 profiling/.gitignore create mode 100755 profiling/benchmark.sh create mode 100755 profiling/profile.sh create mode 100644 profiling/test.c diff --git a/profiling/.gitignore b/profiling/.gitignore new file mode 100644 index 000000000..84bc10df2 --- /dev/null +++ b/profiling/.gitignore @@ -0,0 +1,3 @@ +test +stats +stats.log diff --git a/profiling/benchmark.sh b/profiling/benchmark.sh new file mode 100755 index 000000000..61301b660 --- /dev/null +++ b/profiling/benchmark.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Benchmark context command +make test > /dev/null +git log --abbrev-commit --pretty=oneline HEAD^..HEAD +gdb ./test \ + -ex "source ../gdbinit.py" \ + -ex "b main" -ex "r" \ + -ex "python import timeit; print(' 1ST RUN:', timeit.repeat('pwndbg.commands.context.context()', repeat=1, number=1, globals=globals())[0])" \ + -ex "si" \ + -ex "python import timeit; print(' 2ND RUN:', timeit.repeat('pwndbg.commands.context.context()', repeat=1, number=1, globals=globals())[0])" \ + -ex "si" \ + -ex "python import timeit; print('MULTIPLE RUNS:', timeit.repeat('pwndbg.commands.context.context()', repeat=1, number=10, globals=globals())[0] / 10)" \ + -ex "quit" | grep 'RUNS*:' diff --git a/profiling/profile.sh b/profiling/profile.sh new file mode 100755 index 000000000..80fb7a1bc --- /dev/null +++ b/profiling/profile.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Quick and dirty script to profile pwndbg using cProfile. +make test > /dev/null +git log --abbrev-commit --pretty=oneline HEAD^..HEAD +# To profile first run, remove -ex "context". +gdb ./test \ + -ex "source ../gdbinit.py" \ + -ex "b main" -ex "r" \ + -ex "context" \ + -ex "python import cProfile; cProfile.run('pwndbg.commands.context.context()', 'stats')" \ + -ex "quit" + +python3 -c " +import pstats +p = pstats.Stats('stats') +p.strip_dirs().sort_stats('tottime').print_stats(20) +" +[ -x /usr/local/bin/pyprof2calltree ] && command -v kcachegrind >/dev/null 2>&1 && /usr/local/bin/pyprof2calltree -k -i stats diff --git a/profiling/test.c b/profiling/test.c new file mode 100644 index 000000000..3d5939e31 --- /dev/null +++ b/profiling/test.c @@ -0,0 +1,3 @@ +int main() { + while(1); +} diff --git a/pwndbg/color/syntax_highlight.py b/pwndbg/color/syntax_highlight.py index f5cd7dbf7..c3e6383b3 100644 --- a/pwndbg/color/syntax_highlight.py +++ b/pwndbg/color/syntax_highlight.py @@ -21,8 +21,14 @@ except ImportError: pwndbg.config.Parameter('syntax-highlight', True, 'Source code / assembly syntax highlight') style = theme.Parameter('syntax-highlight-style', 'monokai', 'Source code / assembly syntax highlight stylename of pygments module') + +formatter = pygments.formatters.Terminal256Formatter(style=str(style)) +pwntools_lexer = PwntoolsLexer() +lexer_cache = {} + @pwndbg.config.Trigger([style]) def check_style(): + global formatter try: formatter = pygments.formatters.Terminal256Formatter( style=str(style) @@ -40,20 +46,17 @@ def syntax_highlight(code, filename='.asm'): filename = os.path.basename(filename) - formatter = pygments.formatters.Terminal256Formatter( - style=str(style) - ) - - lexer = None + lexer = lexer_cache.get(filename, None) # If source code is asm, use our customized lexer. # Note: We can not register our Lexer to pygments and use their APIs, # since the pygment only search the lexers installed via setuptools. - for glob_pat in PwntoolsLexer.filenames: - pat = '^' + glob_pat.replace('.', r'\.').replace('*', r'.*') + '$' - if re.match(pat, filename): - lexer = PwntoolsLexer() - break + if not lexer: + for glob_pat in PwntoolsLexer.filenames: + pat = '^' + glob_pat.replace('.', r'\.').replace('*', r'.*') + '$' + if re.match(pat, filename): + lexer = pwntools_lexer + break if not lexer: try: @@ -63,6 +66,7 @@ def syntax_highlight(code, filename='.asm'): pass if lexer: + lexer_cache[filename] = lexer code = pygments.highlight(code, lexer, formatter).rstrip() return code diff --git a/pwndbg/emu/emulator.py b/pwndbg/emu/emulator.py index 54632cb0a..2415ac4a2 100644 --- a/pwndbg/emu/emulator.py +++ b/pwndbg/emu/emulator.py @@ -10,6 +10,7 @@ from __future__ import unicode_literals import binascii import inspect +import re import capstone as C import gdb @@ -21,6 +22,19 @@ import pwndbg.emu.emulator import pwndbg.memory import pwndbg.regs + +def parse_consts(u_consts): + """ + Unicorn "consts" is a python module consisting of a variable definition + for each known entity. We repack it here as a dict for performance. + """ + consts = {} + for name in dir(u_consts): + if name.startswith('UC_'): + consts[name] = getattr(u_consts, name) + return consts + + # Map our internal architecture names onto Unicorn Engine's architecture types. arch_to_UC = { 'i386': U.UC_ARCH_X86, @@ -33,12 +47,12 @@ arch_to_UC = { } arch_to_UC_consts = { - 'i386': U.x86_const, - 'x86-64': U.x86_const, - 'mips': U.mips_const, - 'sparc': U.sparc_const, - 'arm': U.arm_const, - 'aarch64': U.arm64_const, + 'i386': parse_consts(U.x86_const), + 'x86-64': parse_consts(U.x86_const), + 'mips': parse_consts(U.mips_const), + 'sparc': parse_consts(U.sparc_const), + 'arm': parse_consts(U.arm_const), + 'aarch64': parse_consts(U.arm64_const), } # Map our internal architecture names onto Unicorn Engine's architecture types. @@ -98,6 +112,7 @@ e = pwndbg.emu.emulator.Emulator() e.until_jump() ''' + class Emulator(object): def __init__(self): self.arch = pwndbg.arch.current @@ -107,6 +122,14 @@ class Emulator(object): self.consts = arch_to_UC_consts[self.arch] + # Just registers, for faster lookup + self.const_regs = {} + r = re.compile(r'^UC_.*_REG_(.*)$') + for k,v in self.consts.items(): + m = r.match(k) + if m: + self.const_regs[m.group(1)] = v + self.uc_mode = self.get_uc_mode() debug("# Instantiating Unicorn for %s" % self.arch) debug("uc = U.Uc(%r, %r)" % (arch_to_UC[self.arch], self.uc_mode)) @@ -261,8 +284,9 @@ class Emulator(object): # 'eax' ==> enum # if reg in self.regs.all: - for reg_enum in (c for c in dir(self.consts) if c.endswith('_' + reg.upper())): - return getattr(self.consts, reg_enum) + e = self.const_regs.get(reg.upper(), None) + if e is not None: + return e # If we're looking for an abstract register which *is* accounted for, # we can also do an indirect lookup. diff --git a/pwndbg/ida.py b/pwndbg/ida.py index 4e1be3751..a06e8090b 100644 --- a/pwndbg/ida.py +++ b/pwndbg/ida.py @@ -120,6 +120,7 @@ def returns_address(function): return wrapper +@pwndbg.memoize.reset_on_objfile @withIDA def available(): return True