Minor Annotations Improvements (#2364)

* emulator banned instructions and interrupt debug statement * debogusify target resolving call/not-call logic * Fix all RISC-V branches * Fix arm branches always having checkmarks and being unrolled in wrong conditions * Better splits, AArch64 correct conditional branches * lint * remove local variable * remove unused method
1 year ago · c86dc797ee
parent 9e7c41e4b5
commit c86dc797ee
9 changed files with 105 additions and 82 deletions
--- a/pwndbg/emu/emulator.py
+++ b/pwndbg/emu/emulator.py
@ -171,6 +171,13 @@ arch_to_SYSCALL = {
    U.UC_ARCH_RISCV: [C.riscv_const.RISCV_INS_ECALL],
 }

+# We stop emulation when hitting these instructions, since they depend on co-processors or other information
+# unavailable to the emulator
+BANNED_INSTRUCTIONS = {
+    "mips": {C.mips.MIPS_INS_RDHWR},
+    "arm": {C.arm.ARM_INS_MRC, C.arm.ARM_INS_MRRC, C.arm.ARM_INS_MRC2, C.arm.ARM_INS_MRRC2},
+}
+
 # https://github.com/unicorn-engine/unicorn/issues/550
 blacklisted_regs = ["ip", "cs", "ds", "es", "fs", "gs", "ss"]

@ -626,7 +633,8 @@ class Emulator:
        """
        We never want to emulate through an interrupt.  Just stop.
        """
-        debug(DEBUG_INTERRUPT, "Got an interrupt")
+        debug(DEBUG_INTERRUPT, "Got an interrupt - %d", intno)
+        self.valid = False
        self.uc.emu_stop()

    def get_reg_enum(self, reg: str) -> int | None:
@ -696,8 +704,7 @@ class Emulator:
        # and set the least significant bit of the PC to 1 if the bit is 1 in order to enable Thumb mode
        # for the execution of the next instruction. If this `emulate_with_hook` executes multiple instructions
        # which have Thumb mode transitions, Unicorn will internally handle them.
-        thumb_bit = self.read_thumb_bit()
-        pc |= thumb_bit
+        pc |= self.read_thumb_bit()

        try:
            self.emu_start(pc, 0, count=count)
@ -790,14 +797,11 @@ class Emulator:
        )
        self.until_syscall_address = address

-    def single_step(self, pc=None, check_instruction_valid=True) -> Tuple[int, int]:
+    def single_step(self, pc=None) -> Tuple[int, int]:
        """Steps one instruction.

        Yields:
-            Each iteration, yields a tuple of (address_just_executed, instruction_size).=
-
-            A StopIteration is raised if a fault or syscall or call instruction
-            is encountered.
+            Each iteration, yields a tuple of (address_just_executed, instruction_size).

            Returns (None, None) upon failure to execute the instruction
        """
@ -810,25 +814,28 @@ class Emulator:

        pc = pc or self.pc

-        if check_instruction_valid:
-            insn = pwndbg.gdblib.disasm.one_raw(pc)
+        insn = pwndbg.gdblib.disasm.one_raw(pc)

-            # If we don't know how to disassemble, bail.
-            if insn is None:
-                debug(DEBUG_EXECUTING, "Can't disassemble instruction at %#x", pc)
-                return self.last_single_step_result
+        # If we don't know how to disassemble, bail.
+        if insn is None:
+            debug(DEBUG_EXECUTING, "Can't disassemble instruction at %#x", pc)
+            return self.last_single_step_result

-            debug(
-                DEBUG_EXECUTING,
-                "# Emulator attempting to single-step at %#x: %s %s",
-                (pc, insn.mnemonic, insn.op_str),
-            )
-        else:
-            debug(DEBUG_EXECUTING, "# Emulator attempting to single-step at %#x", (pc,))
+        if insn.id in BANNED_INSTRUCTIONS.get(self.arch, {}):
+            debug(DEBUG_EXECUTING, "Hit illegal instruction at %#x", pc)
+            return self.last_single_step_result
+
+        debug(
+            DEBUG_EXECUTING,
+            "# Emulator attempting to single-step at %#x: %s %s",
+            (pc, insn.mnemonic, insn.op_str),
+        )

        try:
            self.single_step_hook_hit_count = 0
            self.emulate_with_hook(self.single_step_hook_code, count=1)
+            if not self.valid:
+                return InstructionExecutedResult(None, None)

            # If above call does not throw an Exception, we successfully executed the instruction
            self.last_pc = pc
--- a/pwndbg/gdblib/disasm/init.py
+++ b/pwndbg/gdblib/disasm/init.py
@ -386,6 +386,8 @@ def near(
        while insn is not None and len(insns) < instructions:
            if DEBUG_ENHANCEMENT:
                print(f"Got instruction from cache, addr={cached:#x}")
+            if insn.jump_like and insn.split == SplitType.NO_SPLIT:
+                insn.split = SplitType.BRANCH_NOT_TAKEN
            insns.append(insn)
            cached = backward_cache[insn.address]
            insn = one(cached, from_cache=use_cache, put_backward_cache=False) if cached else None
@ -423,7 +425,7 @@ def near(

        # Handle visual splits in the disasm view
        # The second check here handles instructions like x86 `REP` that repeat the instruction
-        if insn.jump_like or insn.next == insn.address:
+        if insn.has_jump_target or insn.next == insn.address:
            split_insn = insn

            # If this instruction has a delay slot, disassemble the delay slot instruction
--- a/pwndbg/gdblib/disasm/aarch64.py
+++ b/pwndbg/gdblib/disasm/aarch64.py
@ -187,9 +187,13 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
        # In ARM64, only branches have the conditional code in the instruction,
        # as opposed to ARM32 which allows most instructions to be conditional
        if instruction.id == ARM64_INS_B:
-            flags = super()._read_register_name(instruction, "cpsr", emu)
-            if flags is not None:
-                return resolve_condition(instruction.cs_insn.cc, flags)
+            # The B instruction can be made conditional by the condition codes
+            if instruction.cs_insn.cc in (ARM64_CC_INVALID, ARM64_CC_AL):
+                instruction.declare_conditional = False
+            else:
+                flags = super()._read_register_name(instruction, "cpsr", emu)
+                if flags is not None:
+                    return resolve_condition(instruction.cs_insn.cc, flags)

        elif instruction.id == ARM64_INS_CBNZ:
            op_val = instruction.operands[0].before_value
@ -222,7 +226,7 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
        return super()._condition(instruction, emu)

    @override
-    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None, call=False):
+    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
        if not bool(instruction.groups_set & ALL_JUMP_GROUPS):
            return None

@ -233,7 +237,7 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
            # If this is a ret WITHOUT an operand, it means we should read from the LR/x30 register
            return super()._read_register_name(instruction, "lr", emu)

-        return super()._resolve_target(instruction, emu, call)
+        return super()._resolve_target(instruction, emu)

    @override
    def _parse_memory(
--- a/pwndbg/gdblib/disasm/arch.py
+++ b/pwndbg/gdblib/disasm/arch.py
@ -302,7 +302,7 @@ class DisassemblyAssistant:
                    )

        # Execute the instruction
-        if jump_emu and None in jump_emu.single_step(check_instruction_valid=False):
+        if jump_emu and None in jump_emu.single_step():
            # This branch is taken if stepping the emulator failed
            jump_emu = None
            emu = None
@ -627,7 +627,10 @@ class DisassemblyAssistant:
        # There are cases where the Unicorn emulator is incorrect - for example, delay slots in MIPS causing jumps to not resolve correctly
        # due to the way we single-step the emulator. We want our own manual checks to override the emulator

-        if instruction.condition == InstructionCondition.TRUE or instruction.is_unconditional_jump:
+        if not instruction.call_like and (
+            instruction.condition == InstructionCondition.TRUE or instruction.is_unconditional_jump
+        ):
+            # Don't allow call instructions - we want the actual "nexti" address
            # If condition is true, then this might be a conditional jump
            # There are some other instructions that run conditionally though - resolve_target returns None in those cases
            # Or, if this is a unconditional jump, we will try to resolve target
@ -645,9 +648,9 @@ class DisassemblyAssistant:
        if next_addr is None:
            next_addr = instruction.address + instruction.size

-        # Determine the target of this address. This is the address that the instruction could change the program counter to.
-        # allowing call instructions
-        instruction.target = self._resolve_target(instruction, emu, call=True)
+        # Determine the target of this address.
+        # This is the address that the instruction could potentially change the program counter to, meaning that `stepi` would go to the target
+        instruction.target = self._resolve_target(instruction, emu)

        instruction.next = next_addr & pwndbg.gdblib.arch.ptrmask

@ -667,19 +670,15 @@ class DisassemblyAssistant:

    # This is the default implementation.
    # Subclasses should override this for more accurate behavior/to catch more cases. See x86.py as example
-    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None, call=False):
+    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
        """
        Architecture-specific hook point for _enhance_next.

-        Returns the value of the instruction pointer assuming this instruction executes (and any conditional jumps are taken)
-
-        "call" specifies if we allow this to resolve call instruction targets
+        Returns the program counter target of this instruction.
+        Even in the case of conditional jumps, the potential target should be resolved.
        """

-        if instruction.call_like:
-            if not call:
-                return None
-        elif not bool(instruction.groups_set & FORWARD_JUMP_GROUP):
+        if not bool(instruction.groups_set & FORWARD_JUMP_GROUP):
            return None

        addr = None
--- a/pwndbg/gdblib/disasm/arm.py
+++ b/pwndbg/gdblib/disasm/arm.py
@ -91,6 +91,8 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
    @override
    def _condition(self, instruction: PwndbgInstruction, emu: Emulator) -> InstructionCondition:
        if instruction.cs_insn.cc == ARM_CC_AL:
+            if instruction.id in (ARM_INS_B, ARM_INS_BL, ARM_INS_BLX, ARM_INS_BX, ARM_INS_BXJ):
+                instruction.declare_conditional = False
            return InstructionCondition.UNDETERMINED

        # We can't reason about anything except the current instruction
@ -131,8 +133,8 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
        return InstructionCondition.TRUE if bool(cc) else InstructionCondition.FALSE

    @override
-    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None, call=False):
-        target = super()._resolve_target(instruction, emu, call)
+    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
+        target = super()._resolve_target(instruction, emu)
        if target is not None:
            # On interworking branches - branches that can enable Thumb mode - the target of a jump
            # has the least significant bit set to 1. This is not actually written to the PC
--- a/pwndbg/gdblib/disasm/instruction.py
+++ b/pwndbg/gdblib/disasm/instruction.py
@ -15,11 +15,6 @@ from capstone import CS_AC
 from capstone import CS_GRP
 from capstone import CS_OP
 from capstone import *  # noqa: F403
-from capstone.arm import ARM_INS_B
-from capstone.arm import ARM_INS_BL
-from capstone.arm import ARM_INS_BLX
-from capstone.arm import ARM_INS_BX
-from capstone.arm import ARM_INS_BXJ
 from capstone.arm import ARM_INS_TBB
 from capstone.arm import ARM_INS_TBH

@ -38,6 +33,10 @@ from capstone.ppc import PPC_INS_B
 from capstone.ppc import PPC_INS_BA
 from capstone.ppc import PPC_INS_BL
 from capstone.ppc import PPC_INS_BLA
+from capstone.riscv import RISCV_INS_C_J
+from capstone.riscv import RISCV_INS_C_JAL
+from capstone.riscv import RISCV_INS_C_JALR
+from capstone.riscv import RISCV_INS_C_JR
 from capstone.riscv import RISCV_INS_JAL
 from capstone.riscv import RISCV_INS_JALR
 from capstone.sparc import SPARC_INS_JMP
@ -53,16 +52,18 @@ UNCONDITIONAL_JUMP_INSTRUCTIONS: Dict[int, Set[int]] = {
    CS_ARCH_MIPS: {MIPS_INS_J, MIPS_INS_JR, MIPS_INS_JAL, MIPS_INS_JALR, MIPS_INS_BAL, MIPS_INS_B},
    CS_ARCH_SPARC: {SPARC_INS_JMP, SPARC_INS_JMPL},
    CS_ARCH_ARM: {
-        ARM_INS_B,
-        ARM_INS_BL,
-        ARM_INS_BLX,
-        ARM_INS_BX,
-        ARM_INS_BXJ,
        ARM_INS_TBB,
        ARM_INS_TBH,
    },
    CS_ARCH_ARM64: {ARM64_INS_BL, ARM64_INS_BLR, ARM64_INS_BR},
-    CS_ARCH_RISCV: {RISCV_INS_JAL, RISCV_INS_JALR},
+    CS_ARCH_RISCV: {
+        RISCV_INS_JAL,
+        RISCV_INS_JALR,
+        RISCV_INS_C_JAL,
+        RISCV_INS_C_JALR,
+        RISCV_INS_C_J,
+        RISCV_INS_C_JR,
+    },
    CS_ARCH_PPC: {PPC_INS_B, PPC_INS_BA, PPC_INS_BL, PPC_INS_BLA},
 }

@ -232,6 +233,21 @@ class PwndbgInstruction:
        FALSE if the instruction has a conditional action, and we know it is not taken.
        """

+        self.declare_conditional: bool | None = None
+        """
+        This field is used to declare if the instruction is a conditional instruction.
+        In most cases, we can determine this purely based on the instruction ID, and this field is irrelevent.
+        However, in some arches, like Arm, the same instruction can be made conditional by certain instruction attributes.
+        Ex:
+            Arm, `bls` instruction. This is encoded as a `b` (Capstone ID 11) under the code, with an additional condition code field.
+            In this case, sometimes a `b` instruction (ID 11) is unconditional (always branches), in other cases it is conditional.
+            We use this field to disambiguate these cases.
+
+        True if we manually determine this instruction is a conditional instruction
+        False if it's not a conditional instruction
+        None if we don't have a determination (most cases)
+        """
+
        self.annotation: str | None = None
        """
        The string is set in the "DisassemblyAssistant.enhance" function.
@ -304,7 +320,6 @@ class PwndbgInstruction:
        """
        True if we have determined that this instruction can explicitly change the program counter, and
        it's a JUMP-type instruction.
-
        """
        # The second check ensures that if the target address is itself, it's a jump (infinite loop) and not something like `rep movsb` which repeats the same instruction.
        # Because capstone doesn't catch ALL cases of an instruction changing the PC, we don't have the `jump_like` in the first part of this check.
@ -320,7 +335,8 @@ class PwndbgInstruction:
        This is used, in part, to determine if the instruction deserves a "checkmark" in the disasm view
        """
        return (
-            bool(self.groups_set & GENERIC_JUMP_GROUPS)
+            self.declare_conditional is not False
+            and bool(self.groups_set & GENERIC_JUMP_GROUPS)
            and self.id not in UNCONDITIONAL_JUMP_INSTRUCTIONS[self.cs_insn._cs.arch]
        )

@ -391,6 +407,7 @@ class PwndbgInstruction:
        Operands: [{operands_str}]
        Conditional jump: {self.is_conditional_jump}. Taken: {self.is_conditional_jump_taken}
        Unconditional jump: {self.is_unconditional_jump}
+        Declare unconditional: {self.declare_conditional}
        Can change PC: {self.has_jump_target}
        Syscall: {self.syscall if self.syscall is not None else ""} {self.syscall_name if self.syscall_name is not None else "N/A"}
        Causes Delay slot: {self.causes_branch_delay}
--- a/pwndbg/gdblib/disasm/mips.py
+++ b/pwndbg/gdblib/disasm/mips.py
@ -163,13 +163,13 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
        return InstructionCondition.TRUE if conditional else InstructionCondition.FALSE

    @override
-    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None, call=False):
+    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
        if bool(instruction.groups_set & FORWARD_JUMP_GROUP) and not bool(
            instruction.groups_set & BRANCH_LIKELY_INSTRUCTIONS
        ):
            instruction.causes_branch_delay = True

-        return super()._resolve_target(instruction, emu, call)
+        return super()._resolve_target(instruction, emu)

    @override
    def _parse_memory(
--- a/pwndbg/gdblib/disasm/riscv.py
+++ b/pwndbg/gdblib/disasm/riscv.py
@ -116,6 +116,9 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
        else:
            src2_unsigned = 0

+        if src1_unsigned is None or src2_unsigned is None:
+            return InstructionCondition.UNDETERMINED
+
        src1_signed = bit_math.to_signed(src1_unsigned, pwndbg.gdblib.arch.ptrsize * 8)
        src2_signed = bit_math.to_signed(src2_unsigned, pwndbg.gdblib.arch.ptrsize * 8)

@ -137,19 +140,13 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):

    @override
    def _condition(self, instruction: PwndbgInstruction, emu: Emulator) -> InstructionCondition:
-        """Checks if the current instruction is a jump that is taken.
-        Returns None if the instruction is executed unconditionally,
-        True if the instruction is executed for sure, False otherwise.
+        """
+        Checks if the current instruction is a jump that is taken.
        """
        # JAL / JALR is unconditional
        if RISCV_GRP_CALL in instruction.groups:
            return InstructionCondition.UNDETERMINED

-        # We can't reason about anything except the current instruction
-        # as the comparison result is dependent on the register state.
-        if instruction.address != pwndbg.gdblib.regs.pc:
-            return InstructionCondition.UNDETERMINED
-
        # Determine if the conditional jump is taken
        if RISCV_GRP_BRANCH_RELATIVE in instruction.groups:
            return self._is_condition_taken(instruction, emu)
@ -157,37 +154,32 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
        return InstructionCondition.UNDETERMINED

    @override
-    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None, call=False):
+    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
        """Return the address of the jump / conditional jump,
        None if the next address is not dependent on instruction.
        """
        ptrmask = pwndbg.gdblib.arch.ptrmask
        # JAL is unconditional and independent of current register status
-        if instruction.id in [RISCV_INS_JAL, RISCV_INS_C_JAL]:
+        if instruction.id in (RISCV_INS_JAL, RISCV_INS_C_JAL, RISCV_INS_C_J):
            # But that doesn't apply to ARM anyways :)
            return (instruction.address + instruction.op_find(CS_OP_IMM, 1).imm) & ptrmask

-        # We can't reason about anything except the current instruction
-        # as the comparison result is dependent on the register state.
-        if instruction.address != pwndbg.gdblib.regs.pc:
-            return None
-
-        # Determine if the conditional jump is taken
-        if RISCV_GRP_BRANCH_RELATIVE in instruction.groups and self._is_condition_taken(
-            instruction, emu
-        ):
+        # Determine target of branch - all of them are offset to address
+        if RISCV_GRP_BRANCH_RELATIVE in instruction.groups:
            return (instruction.address + instruction.op_find(CS_OP_IMM, 1).imm) & ptrmask

        # Determine the target address of the indirect jump
-        if instruction.id in [RISCV_INS_JALR, RISCV_INS_C_JALR]:
-            target = instruction.op_find(CS_OP_REG, 1).before_value
+        if instruction.id in (RISCV_INS_JALR, RISCV_INS_C_JALR):
+            if (target := instruction.op_find(CS_OP_REG, 1).before_value) is None:
+                return None
+
            if instruction.id == RISCV_INS_JALR:
                target += instruction.op_find(CS_OP_IMM, 1).imm
            target &= ptrmask
            # Clear the lowest bit without knowing the register width
            return target ^ (target & 1)

-        return super()._resolve_target(instruction, emu, call)
+        return super()._resolve_target(instruction, emu)

    @override
    def _parse_memory(
--- a/pwndbg/gdblib/disasm/x86.py
+++ b/pwndbg/gdblib/disasm/x86.py
@ -310,14 +310,14 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
        return base + op.mem.disp + scale

    @override
-    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None, call=False):
+    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
        # Only handle 'ret', otherwise fallback to default implementation
        if X86_INS_RET != instruction.id or len(instruction.operands) > 1:
-            return super()._resolve_target(instruction, emu, call=call)
+            return super()._resolve_target(instruction, emu)

        # Stop disassembling at RET if we won't know where it goes to without emulation
        if instruction.address != pwndbg.gdblib.regs.pc:
-            return super()._resolve_target(instruction, emu, call=call)
+            return super()._resolve_target(instruction, emu)

        # Otherwise, resolve the return on the stack
        pop = instruction.operands[0].before_value if instruction.operands else 0