Fix ARM IT blocks in disassembly (#2922)

* Fix ARM Thumb mode IT blocks in disassembly * Thumb mode fix * lint * Simplify * Update pwndbg/aglib/disasm/arm.py Co-authored-by: Disconnect3d <dominik.b.czarnota@gmail.com> * Update pwndbg/aglib/disasm/disassembly.py Co-authored-by: Disconnect3d <dominik.b.czarnota@gmail.com> * typo * simplify if --------- Co-authored-by: Disconnect3d <dominik.b.czarnota@gmail.com>
8 months ago · 79706d7315
parent c3bc1dba82
commit 79706d7315
4 changed files with 163 additions and 3 deletions
--- a/pwndbg/aglib/disasm/arm.py
+++ b/pwndbg/aglib/disasm/arm.py
@ -124,6 +124,30 @@ ARM_CAN_WRITE_TO_PC_INSTRUCTIONS = {
 }


+def itstate_from_cpsr(cpsr_value: int) -> int:
+    """
+    ITSTATE == If-Then execution state bits for the Thumb IT instruction
+    The ITSTATE bits are spread across 3 sections of Arm flags register to a total of 8 bits.
+    This function extracts them and reorders the bits into their logical order
+    - https://developer.arm.com/documentation/ddi0403/d/System-Level-Architecture/System-Level-Programmers--Model/Registers/The-special-purpose-program-status-registers--xPSR#:~:text=shows%20the%20assignment%20of%20the%20ICI/IT%20bits.
+
+    Bits of the flags register: EPSR[26:25]    EPSR[15:12]    EPSR[11:10]
+    Bits of ITSTATE:            IT[1:0]        IT[7:4]        IT[3:2]
+
+    The lower 5 bits has information that indicates the number of instructions in the IT Block.
+    The top 3 bits indicate the base condition of the block.
+    - https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Application-Level-Programmers--Model/Execution-state-registers/IT-block-state-register--ITSTATE?lang=en
+
+    If the value is zero, it means we are not in an IT block.
+    """
+
+    return (
+        ((cpsr_value >> 25) & 0b11)
+        | ((cpsr_value >> 10) & 0b11) << 2
+        | ((cpsr_value >> 12) & 0b1111) << 4
+    )
+
+
 # This class enhances both ARM A-profile and ARM M-profile (Cortex-M)
 class ArmDisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant):
    def __init__(self, architecture, flags_reg: Literal["cpsr", "xpsr"]) -> None:
@ -218,6 +242,13 @@ class ArmDisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant):
            # https://github.com/capstone-engine/capstone/issues/2630
            instruction.groups.remove(CS_GRP_CALL)

+        # Disable Unicorn while in IT instruction blocks since Unicorn cannot be paused in it.
+        flags_value = pwndbg.aglib.regs[self.flags_reg]
+        it_state = itstate_from_cpsr(flags_value)
+
+        if (instruction.id == ARM_INS_IT or it_state != 0) and emu:
+            emu.valid = False
+
    @override
    def _condition(self, instruction: PwndbgInstruction, emu: Emulator) -> InstructionCondition:
        if ARM_GRP_JUMP in instruction.groups:
--- a/pwndbg/aglib/disasm/disassembly.py
+++ b/pwndbg/aglib/disasm/disassembly.py
@ -93,8 +93,8 @@ computed_instruction_cache: DefaultDict[int, PwndbgInstruction] = collections.de
 )

 # Maps an address to integer 0/1, indicating the Thumb mode bit for the given address.
-# Value is None if Thumb bit irrelevent or unknown.
-emulated_arm_mode_cache: DefaultDict[int, int] = collections.defaultdict(lambda: None)
+# Value is None if Thumb bit is irrelevent or unknown.
+emulated_arm_mode_cache: DefaultDict[int, int | None] = collections.defaultdict(lambda: None)


@pwndbg.lib.cache.cache_until("objfile")
@ -381,6 +381,8 @@ def near(
    insn = current
    total_instructions = 1 + (2 * instructions)

+    last_emulated_thumb_bit_value: int | None = None
+
    while insn and len(insns) < total_instructions:
        target = insn.next if not linear else insn.address + insn.size

@ -392,7 +394,14 @@ def near(
                # Upon execution the previous instruction, the Thumb mode bit may have changed.
                # This means we know whether the next instruction executed will be Thumb or not.
                # This returns None in the case the Thumb bit is not relevent.
-                emulated_arm_mode_cache[emu.pc] = emu.read_thumb_bit()
+                last_emulated_thumb_bit_value = emulated_arm_mode_cache[emu.pc] = (
+                    emu.read_thumb_bit()
+                )
+
+        if not emu and last_emulated_thumb_bit_value is not None:
+            # The emulator may have been disabled, but while it was live we transitioned into Thumb mode.
+            # We propagate the Thumb mode through the remaining instructions we disassemble.
+            emulated_arm_mode_cache[target] = last_emulated_thumb_bit_value

        # Handle visual splits in the disasm view
        # We create splits in 3 conditions:
--- a/pwndbg/aglib/disasm/instruction.py
+++ b/pwndbg/aglib/disasm/instruction.py
@ -8,6 +8,8 @@ from typing import List
 from typing import Protocol
 from typing import Set

+import pwnlib
+
 # Reverse lookup tables for debug printing
 from capstone import CS_AC
 from capstone import CS_GRP
@ -507,6 +509,7 @@ class PwndbgInstructionImpl(PwndbgInstruction):
        operands_str = " ".join([repr(op) for op in self.operands])

        info = f"""{self.mnemonic} {self.op_str} at {self.address:#x} (size={self.size}) (arch: {CAPSTONE_ARCH_MAPPING_STRING.get(self.cs_insn._cs.arch,None)})
+        Bytes: {pwnlib.util.fiddling.enhex(self.bytes)}
        ID: {self.id}, {self.cs_insn.insn_name()}
        Capstone ID/Alias ID: {self.cs_insn.id} / {self.cs_insn.alias_id if self.cs_insn.is_alias else 'None'}
        Raw asm: {'%-06s %s' % (self.mnemonic, self.op_str)}
@ -531,6 +534,7 @@ class PwndbgInstructionImpl(PwndbgInstruction):
        # Hacky, but this is just for debugging
        if hasattr(self.cs_insn, "cc"):
            info += f"\n\tARM condition code: {self.cs_insn.cc}"
+            info += f"\n\tThumb mode: {1 if self.cs_insn._cs._mode & CS_MODE_THUMB else 0}"

        return info

--- a/tests/qemu-tests/tests/user/test_arm.py
+++ b/tests/qemu-tests/tests/user/test_arm.py
@ -682,3 +682,119 @@ def test_arm_negative_index_register(qemu_assembly_run):
    )

    assert dis == expected
+
+
+ARM_IT_BLOCK = """
+add r0, pc, #1
+bx r0
+
+.THUMB
+CMP     R0, #0
+ITTTE   EQ
+MOVEQ   R1, #1
+MOVEQ   R2, #2
+MOVEQ   R2, #3
+MOVNE   R1, #4
+nop
+nop
+nop
+nop
+nop
+nop
+"""
+
+
+def test_arm_it_block(qemu_assembly_run):
+    """
+    The Unicorn engine cannot be paused in the IT block, so we need to handle these instructions specially.
+
+    Additionally, if we are halfway through an IT block, and then copy the the process state into the emulator, it will finish the
+    IT block upon single stepping, and sometimes step an additional step.
+    """
+
+    qemu_assembly_run(ARM_IT_BLOCK, "arm")
+
+    gdb.execute("si")
+    gdb.execute("si")
+
+    dis_1 = gdb.execute("context disasm", to_string=True)
+    dis_1 = pwndbg.color.strip(dis_1)
+
+    expected_1 = (
+        "LEGEND: STACK | HEAP | CODE | DATA | WX | RODATA\n"
+        "─────────────────[ DISASM / arm / thumb mode / set emulate on ]─────────────────\n"
+        " ► 0x10000008 <_start+8>     cmp    r0, #0     0x10000009 - 0x0     CPSR => 0x20000030 [ n z C v q j T e a i f ]\n"
+        "   0x1000000a <_start+10>    ittte  eq\n"
+        "   0x1000000c <_start+12>    movs   r1, #1     R1 => 1\n"
+        "   0x1000000e <_start+14>    movs   r2, #2     R2 => 2\n"
+        "   0x10000010 <_start+16>    movs   r2, #3     R2 => 3\n"
+        "   0x10000012 <_start+18>    movs   r1, #4     R1 => 4\n"
+        "   0x10000014 <_start+20>    nop    \n"
+        "   0x10000016 <_start+22>    nop    \n"
+        "   0x10000018 <_start+24>    nop    \n"
+        "   0x1000001a <_start+26>    nop    \n"
+        "   0x1000001c <_start+28>    nop    \n"
+        "────────────────────────────────────────────────────────────────────────────────\n"
+    )
+
+    assert dis_1 == expected_1
+
+    # Now, ensure that once we step into the block, the disassembly is still correct.
+    gdb.execute("si")
+    gdb.execute("si")
+    gdb.execute("si")
+
+    dis_2 = gdb.execute("context disasm", to_string=True)
+    dis_2 = pwndbg.color.strip(dis_2)
+
+    expected_2 = (
+        "LEGEND: STACK | HEAP | CODE | DATA | WX | RODATA\n"
+        "─────────────────[ DISASM / arm / thumb mode / set emulate on ]─────────────────\n"
+        "   0x10000008 <_start+8>     cmp    r0, #0     0x10000009 - 0x0     CPSR => 0x20000030 [ n z C v q j T e a i f ]\n"
+        "   0x1000000a <_start+10>    ittte  eq\n"
+        "   0x1000000c <_start+12>    movs   r1, #1     R1 => 1\n"
+        " ► 0x1000000e <_start+14>    movs   r2, #2     R2 => 2\n"
+        "   0x10000010 <_start+16>    movs   r2, #3     R2 => 3\n"
+        "   0x10000012 <_start+18>    movs   r1, #4     R1 => 4\n"
+        "   0x10000014 <_start+20>    nop    \n"
+        "   0x10000016 <_start+22>    nop    \n"
+        "   0x10000018 <_start+24>    nop    \n"
+        "   0x1000001a <_start+26>    nop    \n"
+        "   0x1000001c <_start+28>    nop    \n"
+        "────────────────────────────────────────────────────────────────────────────────\n"
+    )
+
+    assert dis_2 == expected_2
+
+
+def test_arm_it_block_cached_thumb_mode(qemu_assembly_run):
+    """
+    This test ensures that we handle transitions to Thumb mode correctly once the emulator has been disabled.
+    """
+
+    qemu_assembly_run(ARM_IT_BLOCK, "arm")
+
+    gdb.execute("context disasm", to_string=True)
+
+    dis = gdb.execute("context disasm", to_string=True)
+    dis = pwndbg.color.strip(dis)
+
+    expected = (
+        "LEGEND: STACK | HEAP | CODE | DATA | WX | RODATA\n"
+        "──────────────────[ DISASM / arm / arm mode / set emulate on ]──────────────────\n"
+        " ► 0x10000000 <_start>       add    r0, pc, #1     R0 => 0x10000009 (_start+9) (0x10000008 + 0x1)\n"
+        "   0x10000004 <_start+4>     bx     r0                          <_start+8>\n"
+        "    ↓\n"
+        "   0x10000008 <_start+8>     cmp    r0, #0         0x10000009 - 0x0     CPSR => 0x20000030 [ n z C v q j T e a i f ]\n"
+        "   0x1000000a <_start+10>    ittte  eq\n"
+        "   0x1000000c <_start+12>    movs   r1, #1         R1 => 1\n"
+        "   0x1000000e <_start+14>    movs   r2, #2         R2 => 2\n"
+        "   0x10000010 <_start+16>    movs   r2, #3         R2 => 3\n"
+        "   0x10000012 <_start+18>    movs   r1, #4         R1 => 4\n"
+        "   0x10000014 <_start+20>    nop    \n"
+        "   0x10000016 <_start+22>    nop    \n"
+        "   0x10000018 <_start+24>    nop    \n"
+        "────────────────────────────────────────────────────────────────────────────────\n"
+    )
+
+    assert dis == expected