From 79706d73159bf4eda5d64df5b9fe321073f4dd03 Mon Sep 17 00:00:00 2001 From: OBarronCS <55004530+OBarronCS@users.noreply.github.com> Date: Sat, 26 Apr 2025 11:06:55 -0700 Subject: [PATCH] Fix ARM IT blocks in disassembly (#2922) * Fix ARM Thumb mode IT blocks in disassembly * Thumb mode fix * lint * Simplify * Update pwndbg/aglib/disasm/arm.py Co-authored-by: Disconnect3d * Update pwndbg/aglib/disasm/disassembly.py Co-authored-by: Disconnect3d * typo * simplify if --------- Co-authored-by: Disconnect3d --- pwndbg/aglib/disasm/arm.py | 31 +++++++ pwndbg/aglib/disasm/disassembly.py | 15 ++- pwndbg/aglib/disasm/instruction.py | 4 + tests/qemu-tests/tests/user/test_arm.py | 116 ++++++++++++++++++++++++ 4 files changed, 163 insertions(+), 3 deletions(-) diff --git a/pwndbg/aglib/disasm/arm.py b/pwndbg/aglib/disasm/arm.py index e8c06402f..543834d24 100644 --- a/pwndbg/aglib/disasm/arm.py +++ b/pwndbg/aglib/disasm/arm.py @@ -124,6 +124,30 @@ ARM_CAN_WRITE_TO_PC_INSTRUCTIONS = { } +def itstate_from_cpsr(cpsr_value: int) -> int: + """ + ITSTATE == If-Then execution state bits for the Thumb IT instruction + The ITSTATE bits are spread across 3 sections of Arm flags register to a total of 8 bits. + This function extracts them and reorders the bits into their logical order + - https://developer.arm.com/documentation/ddi0403/d/System-Level-Architecture/System-Level-Programmers--Model/Registers/The-special-purpose-program-status-registers--xPSR#:~:text=shows%20the%20assignment%20of%20the%20ICI/IT%20bits. + + Bits of the flags register: EPSR[26:25] EPSR[15:12] EPSR[11:10] + Bits of ITSTATE: IT[1:0] IT[7:4] IT[3:2] + + The lower 5 bits has information that indicates the number of instructions in the IT Block. + The top 3 bits indicate the base condition of the block. + - https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Application-Level-Programmers--Model/Execution-state-registers/IT-block-state-register--ITSTATE?lang=en + + If the value is zero, it means we are not in an IT block. + """ + + return ( + ((cpsr_value >> 25) & 0b11) + | ((cpsr_value >> 10) & 0b11) << 2 + | ((cpsr_value >> 12) & 0b1111) << 4 + ) + + # This class enhances both ARM A-profile and ARM M-profile (Cortex-M) class ArmDisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): def __init__(self, architecture, flags_reg: Literal["cpsr", "xpsr"]) -> None: @@ -218,6 +242,13 @@ class ArmDisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): # https://github.com/capstone-engine/capstone/issues/2630 instruction.groups.remove(CS_GRP_CALL) + # Disable Unicorn while in IT instruction blocks since Unicorn cannot be paused in it. + flags_value = pwndbg.aglib.regs[self.flags_reg] + it_state = itstate_from_cpsr(flags_value) + + if (instruction.id == ARM_INS_IT or it_state != 0) and emu: + emu.valid = False + @override def _condition(self, instruction: PwndbgInstruction, emu: Emulator) -> InstructionCondition: if ARM_GRP_JUMP in instruction.groups: diff --git a/pwndbg/aglib/disasm/disassembly.py b/pwndbg/aglib/disasm/disassembly.py index c13353a1a..801787d73 100644 --- a/pwndbg/aglib/disasm/disassembly.py +++ b/pwndbg/aglib/disasm/disassembly.py @@ -93,8 +93,8 @@ computed_instruction_cache: DefaultDict[int, PwndbgInstruction] = collections.de ) # Maps an address to integer 0/1, indicating the Thumb mode bit for the given address. -# Value is None if Thumb bit irrelevent or unknown. -emulated_arm_mode_cache: DefaultDict[int, int] = collections.defaultdict(lambda: None) +# Value is None if Thumb bit is irrelevent or unknown. +emulated_arm_mode_cache: DefaultDict[int, int | None] = collections.defaultdict(lambda: None) @pwndbg.lib.cache.cache_until("objfile") @@ -381,6 +381,8 @@ def near( insn = current total_instructions = 1 + (2 * instructions) + last_emulated_thumb_bit_value: int | None = None + while insn and len(insns) < total_instructions: target = insn.next if not linear else insn.address + insn.size @@ -392,7 +394,14 @@ def near( # Upon execution the previous instruction, the Thumb mode bit may have changed. # This means we know whether the next instruction executed will be Thumb or not. # This returns None in the case the Thumb bit is not relevent. - emulated_arm_mode_cache[emu.pc] = emu.read_thumb_bit() + last_emulated_thumb_bit_value = emulated_arm_mode_cache[emu.pc] = ( + emu.read_thumb_bit() + ) + + if not emu and last_emulated_thumb_bit_value is not None: + # The emulator may have been disabled, but while it was live we transitioned into Thumb mode. + # We propagate the Thumb mode through the remaining instructions we disassemble. + emulated_arm_mode_cache[target] = last_emulated_thumb_bit_value # Handle visual splits in the disasm view # We create splits in 3 conditions: diff --git a/pwndbg/aglib/disasm/instruction.py b/pwndbg/aglib/disasm/instruction.py index bee00870a..897696993 100644 --- a/pwndbg/aglib/disasm/instruction.py +++ b/pwndbg/aglib/disasm/instruction.py @@ -8,6 +8,8 @@ from typing import List from typing import Protocol from typing import Set +import pwnlib + # Reverse lookup tables for debug printing from capstone import CS_AC from capstone import CS_GRP @@ -507,6 +509,7 @@ class PwndbgInstructionImpl(PwndbgInstruction): operands_str = " ".join([repr(op) for op in self.operands]) info = f"""{self.mnemonic} {self.op_str} at {self.address:#x} (size={self.size}) (arch: {CAPSTONE_ARCH_MAPPING_STRING.get(self.cs_insn._cs.arch,None)}) + Bytes: {pwnlib.util.fiddling.enhex(self.bytes)} ID: {self.id}, {self.cs_insn.insn_name()} Capstone ID/Alias ID: {self.cs_insn.id} / {self.cs_insn.alias_id if self.cs_insn.is_alias else 'None'} Raw asm: {'%-06s %s' % (self.mnemonic, self.op_str)} @@ -531,6 +534,7 @@ class PwndbgInstructionImpl(PwndbgInstruction): # Hacky, but this is just for debugging if hasattr(self.cs_insn, "cc"): info += f"\n\tARM condition code: {self.cs_insn.cc}" + info += f"\n\tThumb mode: {1 if self.cs_insn._cs._mode & CS_MODE_THUMB else 0}" return info diff --git a/tests/qemu-tests/tests/user/test_arm.py b/tests/qemu-tests/tests/user/test_arm.py index f494beeeb..a13a0b252 100644 --- a/tests/qemu-tests/tests/user/test_arm.py +++ b/tests/qemu-tests/tests/user/test_arm.py @@ -682,3 +682,119 @@ def test_arm_negative_index_register(qemu_assembly_run): ) assert dis == expected + + +ARM_IT_BLOCK = """ +add r0, pc, #1 +bx r0 + +.THUMB +CMP R0, #0 +ITTTE EQ +MOVEQ R1, #1 +MOVEQ R2, #2 +MOVEQ R2, #3 +MOVNE R1, #4 +nop +nop +nop +nop +nop +nop +""" + + +def test_arm_it_block(qemu_assembly_run): + """ + The Unicorn engine cannot be paused in the IT block, so we need to handle these instructions specially. + + Additionally, if we are halfway through an IT block, and then copy the the process state into the emulator, it will finish the + IT block upon single stepping, and sometimes step an additional step. + """ + + qemu_assembly_run(ARM_IT_BLOCK, "arm") + + gdb.execute("si") + gdb.execute("si") + + dis_1 = gdb.execute("context disasm", to_string=True) + dis_1 = pwndbg.color.strip(dis_1) + + expected_1 = ( + "LEGEND: STACK | HEAP | CODE | DATA | WX | RODATA\n" + "─────────────────[ DISASM / arm / thumb mode / set emulate on ]─────────────────\n" + " ► 0x10000008 <_start+8> cmp r0, #0 0x10000009 - 0x0 CPSR => 0x20000030 [ n z C v q j T e a i f ]\n" + " 0x1000000a <_start+10> ittte eq\n" + " 0x1000000c <_start+12> movs r1, #1 R1 => 1\n" + " 0x1000000e <_start+14> movs r2, #2 R2 => 2\n" + " 0x10000010 <_start+16> movs r2, #3 R2 => 3\n" + " 0x10000012 <_start+18> movs r1, #4 R1 => 4\n" + " 0x10000014 <_start+20> nop \n" + " 0x10000016 <_start+22> nop \n" + " 0x10000018 <_start+24> nop \n" + " 0x1000001a <_start+26> nop \n" + " 0x1000001c <_start+28> nop \n" + "────────────────────────────────────────────────────────────────────────────────\n" + ) + + assert dis_1 == expected_1 + + # Now, ensure that once we step into the block, the disassembly is still correct. + gdb.execute("si") + gdb.execute("si") + gdb.execute("si") + + dis_2 = gdb.execute("context disasm", to_string=True) + dis_2 = pwndbg.color.strip(dis_2) + + expected_2 = ( + "LEGEND: STACK | HEAP | CODE | DATA | WX | RODATA\n" + "─────────────────[ DISASM / arm / thumb mode / set emulate on ]─────────────────\n" + " 0x10000008 <_start+8> cmp r0, #0 0x10000009 - 0x0 CPSR => 0x20000030 [ n z C v q j T e a i f ]\n" + " 0x1000000a <_start+10> ittte eq\n" + " 0x1000000c <_start+12> movs r1, #1 R1 => 1\n" + " ► 0x1000000e <_start+14> movs r2, #2 R2 => 2\n" + " 0x10000010 <_start+16> movs r2, #3 R2 => 3\n" + " 0x10000012 <_start+18> movs r1, #4 R1 => 4\n" + " 0x10000014 <_start+20> nop \n" + " 0x10000016 <_start+22> nop \n" + " 0x10000018 <_start+24> nop \n" + " 0x1000001a <_start+26> nop \n" + " 0x1000001c <_start+28> nop \n" + "────────────────────────────────────────────────────────────────────────────────\n" + ) + + assert dis_2 == expected_2 + + +def test_arm_it_block_cached_thumb_mode(qemu_assembly_run): + """ + This test ensures that we handle transitions to Thumb mode correctly once the emulator has been disabled. + """ + + qemu_assembly_run(ARM_IT_BLOCK, "arm") + + gdb.execute("context disasm", to_string=True) + + dis = gdb.execute("context disasm", to_string=True) + dis = pwndbg.color.strip(dis) + + expected = ( + "LEGEND: STACK | HEAP | CODE | DATA | WX | RODATA\n" + "──────────────────[ DISASM / arm / arm mode / set emulate on ]──────────────────\n" + " ► 0x10000000 <_start> add r0, pc, #1 R0 => 0x10000009 (_start+9) (0x10000008 + 0x1)\n" + " 0x10000004 <_start+4> bx r0 <_start+8>\n" + " ↓\n" + " 0x10000008 <_start+8> cmp r0, #0 0x10000009 - 0x0 CPSR => 0x20000030 [ n z C v q j T e a i f ]\n" + " 0x1000000a <_start+10> ittte eq\n" + " 0x1000000c <_start+12> movs r1, #1 R1 => 1\n" + " 0x1000000e <_start+14> movs r2, #2 R2 => 2\n" + " 0x10000010 <_start+16> movs r2, #3 R2 => 3\n" + " 0x10000012 <_start+18> movs r1, #4 R1 => 4\n" + " 0x10000014 <_start+20> nop \n" + " 0x10000016 <_start+22> nop \n" + " 0x10000018 <_start+24> nop \n" + "────────────────────────────────────────────────────────────────────────────────\n" + ) + + assert dis == expected