A couple disassembly performance optimizations (#2979)

* Fix unnecessary run of enhancement code * Avoid another unnecessary instruction disassembly * Update ARM IT block tests to reflect improved behavior in standalone disassembly
7 months ago · f49a43aaa2
parent 080a7da557
commit f49a43aaa2
6 changed files with 74 additions and 32 deletions
--- a/pwndbg/aglib/disasm/arch.py
+++ b/pwndbg/aglib/disasm/arch.py
@ -1049,3 +1049,23 @@ class DisassemblyAssistant:
            instruction.annotation = memory_or_register_assign(
                target_operand.str, math_string, memory_assignment
            )
+
+
+def basic_enhance(ins: PwndbgInstruction) -> None:
+    # Apply syntax highlighting and inline symbol replacement
+    # Used in cases were we don't want to do the full enhancement process
+    # for performance reasons.
+    if pwndbg.config.syntax_highlight:
+        ins.asm_string = syntax_highlight(ins.asm_string)
+
+    if pwndbg.config.disasm_inline_symbols:
+        # Make inline replacements, so `jmp 0x400122` becomes `jmp function_name`
+        for op in ins.operands:
+            if op.type is CS_OP_IMM:
+                op.before_value = op.imm
+
+                if op.before_value >= 0:
+                    op.symbol = MemoryColor.attempt_colorized_symbol(op.before_value)
+
+                if op.symbol:
+                    ins.asm_string = ins.asm_string.replace(hex(op.before_value), op.symbol)
--- a/pwndbg/aglib/disasm/disassembly.py
+++ b/pwndbg/aglib/disasm/disassembly.py
@ -19,6 +19,7 @@ from capstone import *  # noqa: F403
 import pwndbg
 import pwndbg.aglib.arch
 import pwndbg.aglib.disasm.aarch64
+import pwndbg.aglib.disasm.arch
 import pwndbg.aglib.disasm.arm
 import pwndbg.aglib.disasm.disassembly
 import pwndbg.aglib.disasm.loongarch64
@ -98,11 +99,8 @@ emulated_arm_mode_cache: DefaultDict[int, int | None] = collections.defaultdict(


@pwndbg.lib.cache.cache_until("objfile")
-def get_disassembler(address: int, cs_info: Tuple[int, int] = None):
-    if cs_info is not None:
-        arch, mode = cs_info
-    else:
-        arch, mode = pwndbg.aglib.arch.get_capstone_constants(address)
+def get_disassembler(cs_info: Tuple[int, int]):
+    arch, mode = cs_info

    mode |= CapstoneEndian[pwndbg.aglib.arch.endian]

@ -136,9 +134,12 @@ def get_one_instruction(

    cs_info = pwndbg.aglib.arch.get_capstone_constants(address)
    if cs_info is None:
-        return ManualPwndbgInstruction(address)
+        instr = ManualPwndbgInstruction(address)
+        if enhance:
+            pwndbg.aglib.disasm.arch.basic_enhance(instr)
+        return instr

-    md = get_disassembler(address, cs_info)
+    md = get_disassembler(cs_info)
    data = pwndbg.aglib.memory.read(address, pwndbg.aglib.arch.max_instruction_size, partial=True)
    for ins in md.disasm(bytes(data), address, 1):
        pwn_ins: PwndbgInstruction = PwndbgInstructionImpl(ins)
--- a/pwndbg/commands/rop.py
+++ b/pwndbg/commands/rop.py
@ -24,7 +24,7 @@ class RawMemoryBinary(object):
        self.start_addr = start_addr
        self.__fileName = options.binary
        self.__rawBinary = None
-        self.cs = get_disassembler(pwndbg.aglib.regs.pc)
+        self.cs = get_disassembler(pwndbg.aglib.arch.get_capstone_constants(pwndbg.aglib.regs.pc))

        with open(self.__fileName, "rb") as fp:
            self.__rawBinary = fp.read()
--- a/pwndbg/emu/emulator.py
+++ b/pwndbg/emu/emulator.py
@ -827,7 +827,7 @@ class Emulator:
        )
        self.until_syscall_address = address

-    def single_step(self, pc=None) -> Tuple[int, int]:
+    def single_step(self, pc=None, check_instruction=False) -> Tuple[int, int]:
        """Steps one instruction.

        Yields:
@ -844,22 +844,23 @@ class Emulator:

        pc = pc or self.pc

-        insn = pwndbg.aglib.disasm.disassembly.one_raw(pc)
+        if check_instruction or DEBUG & DEBUG_EXECUTING:
+            insn = pwndbg.aglib.disasm.disassembly.one_raw(pc)

-        # If we don't know how to disassemble, bail.
-        if insn is None:
-            debug(DEBUG_EXECUTING, "Can't disassemble instruction at %#x", pc)
-            return self.last_single_step_result
+            # If we don't know how to disassemble, bail.
+            if insn is None:
+                debug(DEBUG_EXECUTING, "Can't disassemble instruction at %#x", pc)
+                return self.last_single_step_result

-        if insn.id in BANNED_INSTRUCTIONS.get(self.arch, {}):
-            debug(DEBUG_EXECUTING, "Hit illegal instruction at %#x", pc)
-            return self.last_single_step_result
+            if insn.id in BANNED_INSTRUCTIONS.get(self.arch, {}):
+                debug(DEBUG_EXECUTING, "Hit illegal instruction at %#x", pc)
+                return self.last_single_step_result

-        debug(
-            DEBUG_EXECUTING,
-            "# Emulator attempting to single-step at %#x: %s %s",
-            (pc, insn.mnemonic, insn.op_str),
-        )
+            debug(
+                DEBUG_EXECUTING,
+                "# Instruction: attempting to single-step at %#x: %s %s",
+                (pc, insn.mnemonic, insn.op_str),
+            )

        try:
            self.single_step_hook_hit_count = 0
--- a/pwndbg/enhance.py
+++ b/pwndbg/enhance.py
@ -112,8 +112,9 @@ def enhance(
        rwx = exe = False

    if exe:
-        pwndbg_instr = pwndbg.aglib.disasm.disassembly.one(value)
+        pwndbg_instr = pwndbg.aglib.disasm.disassembly.one_raw(value)
        if pwndbg_instr:
+            pwndbg.aglib.disasm.arch.basic_enhance(pwndbg_instr)
            # For telescoping, we don't want the extra spaces between the mnemonic and operands
            # which are baked in during enhancement. This removes those spaces.
            instr = " ".join(pwndbg_instr.asm_string.split())
--- a/tests/qemu-tests/tests/user/test_arm.py
+++ b/tests/qemu-tests/tests/user/test_arm.py
@ -745,10 +745,10 @@ def test_arm_it_block(qemu_assembly_run):
        "─────────────────[ DISASM / arm / thumb mode / set emulate on ]─────────────────\n"
        " ► 0x200bc <_start+8>     cmp    r0, #0     0x200bd - 0x0     CPSR => 0x20000030 [ n z C v q j T e a i f ]\n"
        "   0x200be <_start+10>    ittte  eq\n"
-        "   0x200c0 <_start+12>    movs   r1, #1     R1 => 1\n"
-        "   0x200c2 <_start+14>    movs   r2, #2     R2 => 2\n"
-        "   0x200c4 <_start+16>    movs   r2, #3     R2 => 3\n"
-        "   0x200c6 <_start+18>    movs   r1, #4     R1 => 4\n"
+        "   0x200c0 <_start+12>    moveq  r1, #1     R1 => 1\n"
+        "   0x200c2 <_start+14>    moveq  r2, #2     R2 => 2\n"
+        "   0x200c4 <_start+16>    moveq  r2, #3     R2 => 3\n"
+        "   0x200c6 <_start+18>    movne  r1, #4     R1 => 4\n"
        "   0x200c8 <_start+20>    nop    \n"
        "   0x200ca <_start+22>    nop    \n"
        "   0x200cc <_start+24>    nop    \n"
@ -759,6 +759,25 @@ def test_arm_it_block(qemu_assembly_run):

    assert dis_1 == expected_1

+
+def test_arm_it_block_step_into(qemu_assembly_run):
+    """
+    Tests 2 things:
+    - Stepping into the IT block doesn't break our usage Unicorn (some of the instructions would appear to be jumps if so)
+    - Instructions have IT mode suffixes
+
+    TODO:
+    - Fix suffixes not appearing here due to https://github.com/capstone-engine/capstone/issues/2702
+    - See comments in https://github.com/pwndbg/pwndbg/pull/2979
+    """
+    qemu_assembly_run(ARM_IT_BLOCK, "arm")
+
+    gdb.execute("si")
+    gdb.execute("si")
+
+    # Prime the instruction cache
+    gdb.execute("context disasm", to_string=True)
+
    # Now, ensure that once we step into the block, the disassembly is still correct.
    gdb.execute("si")
    gdb.execute("si")
@ -772,7 +791,7 @@ def test_arm_it_block(qemu_assembly_run):
        "─────────────────[ DISASM / arm / thumb mode / set emulate on ]─────────────────\n"
        "   0x200bc <_start+8>     cmp    r0, #0     0x200bd - 0x0     CPSR => 0x20000030 [ n z C v q j T e a i f ]\n"
        "   0x200be <_start+10>    ittte  eq\n"
-        "   0x200c0 <_start+12>    movs   r1, #1     R1 => 1\n"
+        "   0x200c0 <_start+12>    moveq  r1, #1     R1 => 1\n"
        " ► 0x200c2 <_start+14>    movs   r2, #2     R2 => 2\n"
        "   0x200c4 <_start+16>    movs   r2, #3     R2 => 3\n"
        "   0x200c6 <_start+18>    movs   r1, #4     R1 => 4\n"
@ -807,10 +826,10 @@ def test_arm_it_block_cached_thumb_mode(qemu_assembly_run):
        "    ↓\n"
        "   0x200bc <_start+8>     cmp    r0, #0         0x200bd - 0x0     CPSR => 0x20000030 [ n z C v q j T e a i f ]\n"
        "   0x200be <_start+10>    ittte  eq\n"
-        "   0x200c0 <_start+12>    movs   r1, #1         R1 => 1\n"
-        "   0x200c2 <_start+14>    movs   r2, #2         R2 => 2\n"
-        "   0x200c4 <_start+16>    movs   r2, #3         R2 => 3\n"
-        "   0x200c6 <_start+18>    movs   r1, #4         R1 => 4\n"
+        "   0x200c0 <_start+12>    moveq  r1, #1         R1 => 1\n"
+        "   0x200c2 <_start+14>    moveq  r2, #2         R2 => 2\n"
+        "   0x200c4 <_start+16>    moveq  r2, #3         R2 => 3\n"
+        "   0x200c6 <_start+18>    movne  r1, #4         R1 => 4\n"
        "   0x200c8 <_start+20>    nop    \n"
        "   0x200ca <_start+22>    nop    \n"
        "   0x200cc <_start+24>    nop    \n"