Minor Annotations Improvements (#2364)

* emulator banned instructions and interrupt debug statement

* debogusify target resolving call/not-call logic

* Fix all RISC-V branches

* Fix arm branches always having checkmarks and being unrolled in wrong conditions

* Better splits, AArch64 correct conditional branches

* lint

* remove local variable

* remove unused  method
pull/2366/head
OBarronCS 1 year ago committed by GitHub
parent 9e7c41e4b5
commit c86dc797ee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -171,6 +171,13 @@ arch_to_SYSCALL = {
U.UC_ARCH_RISCV: [C.riscv_const.RISCV_INS_ECALL],
}
# We stop emulation when hitting these instructions, since they depend on co-processors or other information
# unavailable to the emulator
BANNED_INSTRUCTIONS = {
"mips": {C.mips.MIPS_INS_RDHWR},
"arm": {C.arm.ARM_INS_MRC, C.arm.ARM_INS_MRRC, C.arm.ARM_INS_MRC2, C.arm.ARM_INS_MRRC2},
}
# https://github.com/unicorn-engine/unicorn/issues/550
blacklisted_regs = ["ip", "cs", "ds", "es", "fs", "gs", "ss"]
@ -626,7 +633,8 @@ class Emulator:
"""
We never want to emulate through an interrupt. Just stop.
"""
debug(DEBUG_INTERRUPT, "Got an interrupt")
debug(DEBUG_INTERRUPT, "Got an interrupt - %d", intno)
self.valid = False
self.uc.emu_stop()
def get_reg_enum(self, reg: str) -> int | None:
@ -696,8 +704,7 @@ class Emulator:
# and set the least significant bit of the PC to 1 if the bit is 1 in order to enable Thumb mode
# for the execution of the next instruction. If this `emulate_with_hook` executes multiple instructions
# which have Thumb mode transitions, Unicorn will internally handle them.
thumb_bit = self.read_thumb_bit()
pc |= thumb_bit
pc |= self.read_thumb_bit()
try:
self.emu_start(pc, 0, count=count)
@ -790,14 +797,11 @@ class Emulator:
)
self.until_syscall_address = address
def single_step(self, pc=None, check_instruction_valid=True) -> Tuple[int, int]:
def single_step(self, pc=None) -> Tuple[int, int]:
"""Steps one instruction.
Yields:
Each iteration, yields a tuple of (address_just_executed, instruction_size).=
A StopIteration is raised if a fault or syscall or call instruction
is encountered.
Each iteration, yields a tuple of (address_just_executed, instruction_size).
Returns (None, None) upon failure to execute the instruction
"""
@ -810,25 +814,28 @@ class Emulator:
pc = pc or self.pc
if check_instruction_valid:
insn = pwndbg.gdblib.disasm.one_raw(pc)
insn = pwndbg.gdblib.disasm.one_raw(pc)
# If we don't know how to disassemble, bail.
if insn is None:
debug(DEBUG_EXECUTING, "Can't disassemble instruction at %#x", pc)
return self.last_single_step_result
# If we don't know how to disassemble, bail.
if insn is None:
debug(DEBUG_EXECUTING, "Can't disassemble instruction at %#x", pc)
return self.last_single_step_result
debug(
DEBUG_EXECUTING,
"# Emulator attempting to single-step at %#x: %s %s",
(pc, insn.mnemonic, insn.op_str),
)
else:
debug(DEBUG_EXECUTING, "# Emulator attempting to single-step at %#x", (pc,))
if insn.id in BANNED_INSTRUCTIONS.get(self.arch, {}):
debug(DEBUG_EXECUTING, "Hit illegal instruction at %#x", pc)
return self.last_single_step_result
debug(
DEBUG_EXECUTING,
"# Emulator attempting to single-step at %#x: %s %s",
(pc, insn.mnemonic, insn.op_str),
)
try:
self.single_step_hook_hit_count = 0
self.emulate_with_hook(self.single_step_hook_code, count=1)
if not self.valid:
return InstructionExecutedResult(None, None)
# If above call does not throw an Exception, we successfully executed the instruction
self.last_pc = pc

@ -386,6 +386,8 @@ def near(
while insn is not None and len(insns) < instructions:
if DEBUG_ENHANCEMENT:
print(f"Got instruction from cache, addr={cached:#x}")
if insn.jump_like and insn.split == SplitType.NO_SPLIT:
insn.split = SplitType.BRANCH_NOT_TAKEN
insns.append(insn)
cached = backward_cache[insn.address]
insn = one(cached, from_cache=use_cache, put_backward_cache=False) if cached else None
@ -423,7 +425,7 @@ def near(
# Handle visual splits in the disasm view
# The second check here handles instructions like x86 `REP` that repeat the instruction
if insn.jump_like or insn.next == insn.address:
if insn.has_jump_target or insn.next == insn.address:
split_insn = insn
# If this instruction has a delay slot, disassemble the delay slot instruction

@ -187,9 +187,13 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
# In ARM64, only branches have the conditional code in the instruction,
# as opposed to ARM32 which allows most instructions to be conditional
if instruction.id == ARM64_INS_B:
flags = super()._read_register_name(instruction, "cpsr", emu)
if flags is not None:
return resolve_condition(instruction.cs_insn.cc, flags)
# The B instruction can be made conditional by the condition codes
if instruction.cs_insn.cc in (ARM64_CC_INVALID, ARM64_CC_AL):
instruction.declare_conditional = False
else:
flags = super()._read_register_name(instruction, "cpsr", emu)
if flags is not None:
return resolve_condition(instruction.cs_insn.cc, flags)
elif instruction.id == ARM64_INS_CBNZ:
op_val = instruction.operands[0].before_value
@ -222,7 +226,7 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
return super()._condition(instruction, emu)
@override
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None, call=False):
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
if not bool(instruction.groups_set & ALL_JUMP_GROUPS):
return None
@ -233,7 +237,7 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
# If this is a ret WITHOUT an operand, it means we should read from the LR/x30 register
return super()._read_register_name(instruction, "lr", emu)
return super()._resolve_target(instruction, emu, call)
return super()._resolve_target(instruction, emu)
@override
def _parse_memory(

@ -302,7 +302,7 @@ class DisassemblyAssistant:
)
# Execute the instruction
if jump_emu and None in jump_emu.single_step(check_instruction_valid=False):
if jump_emu and None in jump_emu.single_step():
# This branch is taken if stepping the emulator failed
jump_emu = None
emu = None
@ -627,7 +627,10 @@ class DisassemblyAssistant:
# There are cases where the Unicorn emulator is incorrect - for example, delay slots in MIPS causing jumps to not resolve correctly
# due to the way we single-step the emulator. We want our own manual checks to override the emulator
if instruction.condition == InstructionCondition.TRUE or instruction.is_unconditional_jump:
if not instruction.call_like and (
instruction.condition == InstructionCondition.TRUE or instruction.is_unconditional_jump
):
# Don't allow call instructions - we want the actual "nexti" address
# If condition is true, then this might be a conditional jump
# There are some other instructions that run conditionally though - resolve_target returns None in those cases
# Or, if this is a unconditional jump, we will try to resolve target
@ -645,9 +648,9 @@ class DisassemblyAssistant:
if next_addr is None:
next_addr = instruction.address + instruction.size
# Determine the target of this address. This is the address that the instruction could change the program counter to.
# allowing call instructions
instruction.target = self._resolve_target(instruction, emu, call=True)
# Determine the target of this address.
# This is the address that the instruction could potentially change the program counter to, meaning that `stepi` would go to the target
instruction.target = self._resolve_target(instruction, emu)
instruction.next = next_addr & pwndbg.gdblib.arch.ptrmask
@ -667,19 +670,15 @@ class DisassemblyAssistant:
# This is the default implementation.
# Subclasses should override this for more accurate behavior/to catch more cases. See x86.py as example
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None, call=False):
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
"""
Architecture-specific hook point for _enhance_next.
Returns the value of the instruction pointer assuming this instruction executes (and any conditional jumps are taken)
"call" specifies if we allow this to resolve call instruction targets
Returns the program counter target of this instruction.
Even in the case of conditional jumps, the potential target should be resolved.
"""
if instruction.call_like:
if not call:
return None
elif not bool(instruction.groups_set & FORWARD_JUMP_GROUP):
if not bool(instruction.groups_set & FORWARD_JUMP_GROUP):
return None
addr = None

@ -91,6 +91,8 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
@override
def _condition(self, instruction: PwndbgInstruction, emu: Emulator) -> InstructionCondition:
if instruction.cs_insn.cc == ARM_CC_AL:
if instruction.id in (ARM_INS_B, ARM_INS_BL, ARM_INS_BLX, ARM_INS_BX, ARM_INS_BXJ):
instruction.declare_conditional = False
return InstructionCondition.UNDETERMINED
# We can't reason about anything except the current instruction
@ -131,8 +133,8 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
return InstructionCondition.TRUE if bool(cc) else InstructionCondition.FALSE
@override
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None, call=False):
target = super()._resolve_target(instruction, emu, call)
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
target = super()._resolve_target(instruction, emu)
if target is not None:
# On interworking branches - branches that can enable Thumb mode - the target of a jump
# has the least significant bit set to 1. This is not actually written to the PC

@ -15,11 +15,6 @@ from capstone import CS_AC
from capstone import CS_GRP
from capstone import CS_OP
from capstone import * # noqa: F403
from capstone.arm import ARM_INS_B
from capstone.arm import ARM_INS_BL
from capstone.arm import ARM_INS_BLX
from capstone.arm import ARM_INS_BX
from capstone.arm import ARM_INS_BXJ
from capstone.arm import ARM_INS_TBB
from capstone.arm import ARM_INS_TBH
@ -38,6 +33,10 @@ from capstone.ppc import PPC_INS_B
from capstone.ppc import PPC_INS_BA
from capstone.ppc import PPC_INS_BL
from capstone.ppc import PPC_INS_BLA
from capstone.riscv import RISCV_INS_C_J
from capstone.riscv import RISCV_INS_C_JAL
from capstone.riscv import RISCV_INS_C_JALR
from capstone.riscv import RISCV_INS_C_JR
from capstone.riscv import RISCV_INS_JAL
from capstone.riscv import RISCV_INS_JALR
from capstone.sparc import SPARC_INS_JMP
@ -53,16 +52,18 @@ UNCONDITIONAL_JUMP_INSTRUCTIONS: Dict[int, Set[int]] = {
CS_ARCH_MIPS: {MIPS_INS_J, MIPS_INS_JR, MIPS_INS_JAL, MIPS_INS_JALR, MIPS_INS_BAL, MIPS_INS_B},
CS_ARCH_SPARC: {SPARC_INS_JMP, SPARC_INS_JMPL},
CS_ARCH_ARM: {
ARM_INS_B,
ARM_INS_BL,
ARM_INS_BLX,
ARM_INS_BX,
ARM_INS_BXJ,
ARM_INS_TBB,
ARM_INS_TBH,
},
CS_ARCH_ARM64: {ARM64_INS_BL, ARM64_INS_BLR, ARM64_INS_BR},
CS_ARCH_RISCV: {RISCV_INS_JAL, RISCV_INS_JALR},
CS_ARCH_RISCV: {
RISCV_INS_JAL,
RISCV_INS_JALR,
RISCV_INS_C_JAL,
RISCV_INS_C_JALR,
RISCV_INS_C_J,
RISCV_INS_C_JR,
},
CS_ARCH_PPC: {PPC_INS_B, PPC_INS_BA, PPC_INS_BL, PPC_INS_BLA},
}
@ -232,6 +233,21 @@ class PwndbgInstruction:
FALSE if the instruction has a conditional action, and we know it is not taken.
"""
self.declare_conditional: bool | None = None
"""
This field is used to declare if the instruction is a conditional instruction.
In most cases, we can determine this purely based on the instruction ID, and this field is irrelevent.
However, in some arches, like Arm, the same instruction can be made conditional by certain instruction attributes.
Ex:
Arm, `bls` instruction. This is encoded as a `b` (Capstone ID 11) under the code, with an additional condition code field.
In this case, sometimes a `b` instruction (ID 11) is unconditional (always branches), in other cases it is conditional.
We use this field to disambiguate these cases.
True if we manually determine this instruction is a conditional instruction
False if it's not a conditional instruction
None if we don't have a determination (most cases)
"""
self.annotation: str | None = None
"""
The string is set in the "DisassemblyAssistant.enhance" function.
@ -304,7 +320,6 @@ class PwndbgInstruction:
"""
True if we have determined that this instruction can explicitly change the program counter, and
it's a JUMP-type instruction.
"""
# The second check ensures that if the target address is itself, it's a jump (infinite loop) and not something like `rep movsb` which repeats the same instruction.
# Because capstone doesn't catch ALL cases of an instruction changing the PC, we don't have the `jump_like` in the first part of this check.
@ -320,7 +335,8 @@ class PwndbgInstruction:
This is used, in part, to determine if the instruction deserves a "checkmark" in the disasm view
"""
return (
bool(self.groups_set & GENERIC_JUMP_GROUPS)
self.declare_conditional is not False
and bool(self.groups_set & GENERIC_JUMP_GROUPS)
and self.id not in UNCONDITIONAL_JUMP_INSTRUCTIONS[self.cs_insn._cs.arch]
)
@ -391,6 +407,7 @@ class PwndbgInstruction:
Operands: [{operands_str}]
Conditional jump: {self.is_conditional_jump}. Taken: {self.is_conditional_jump_taken}
Unconditional jump: {self.is_unconditional_jump}
Declare unconditional: {self.declare_conditional}
Can change PC: {self.has_jump_target}
Syscall: {self.syscall if self.syscall is not None else ""} {self.syscall_name if self.syscall_name is not None else "N/A"}
Causes Delay slot: {self.causes_branch_delay}

@ -163,13 +163,13 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
return InstructionCondition.TRUE if conditional else InstructionCondition.FALSE
@override
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None, call=False):
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
if bool(instruction.groups_set & FORWARD_JUMP_GROUP) and not bool(
instruction.groups_set & BRANCH_LIKELY_INSTRUCTIONS
):
instruction.causes_branch_delay = True
return super()._resolve_target(instruction, emu, call)
return super()._resolve_target(instruction, emu)
@override
def _parse_memory(

@ -116,6 +116,9 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
else:
src2_unsigned = 0
if src1_unsigned is None or src2_unsigned is None:
return InstructionCondition.UNDETERMINED
src1_signed = bit_math.to_signed(src1_unsigned, pwndbg.gdblib.arch.ptrsize * 8)
src2_signed = bit_math.to_signed(src2_unsigned, pwndbg.gdblib.arch.ptrsize * 8)
@ -137,19 +140,13 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
@override
def _condition(self, instruction: PwndbgInstruction, emu: Emulator) -> InstructionCondition:
"""Checks if the current instruction is a jump that is taken.
Returns None if the instruction is executed unconditionally,
True if the instruction is executed for sure, False otherwise.
"""
Checks if the current instruction is a jump that is taken.
"""
# JAL / JALR is unconditional
if RISCV_GRP_CALL in instruction.groups:
return InstructionCondition.UNDETERMINED
# We can't reason about anything except the current instruction
# as the comparison result is dependent on the register state.
if instruction.address != pwndbg.gdblib.regs.pc:
return InstructionCondition.UNDETERMINED
# Determine if the conditional jump is taken
if RISCV_GRP_BRANCH_RELATIVE in instruction.groups:
return self._is_condition_taken(instruction, emu)
@ -157,37 +154,32 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
return InstructionCondition.UNDETERMINED
@override
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None, call=False):
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
"""Return the address of the jump / conditional jump,
None if the next address is not dependent on instruction.
"""
ptrmask = pwndbg.gdblib.arch.ptrmask
# JAL is unconditional and independent of current register status
if instruction.id in [RISCV_INS_JAL, RISCV_INS_C_JAL]:
if instruction.id in (RISCV_INS_JAL, RISCV_INS_C_JAL, RISCV_INS_C_J):
# But that doesn't apply to ARM anyways :)
return (instruction.address + instruction.op_find(CS_OP_IMM, 1).imm) & ptrmask
# We can't reason about anything except the current instruction
# as the comparison result is dependent on the register state.
if instruction.address != pwndbg.gdblib.regs.pc:
return None
# Determine if the conditional jump is taken
if RISCV_GRP_BRANCH_RELATIVE in instruction.groups and self._is_condition_taken(
instruction, emu
):
# Determine target of branch - all of them are offset to address
if RISCV_GRP_BRANCH_RELATIVE in instruction.groups:
return (instruction.address + instruction.op_find(CS_OP_IMM, 1).imm) & ptrmask
# Determine the target address of the indirect jump
if instruction.id in [RISCV_INS_JALR, RISCV_INS_C_JALR]:
target = instruction.op_find(CS_OP_REG, 1).before_value
if instruction.id in (RISCV_INS_JALR, RISCV_INS_C_JALR):
if (target := instruction.op_find(CS_OP_REG, 1).before_value) is None:
return None
if instruction.id == RISCV_INS_JALR:
target += instruction.op_find(CS_OP_IMM, 1).imm
target &= ptrmask
# Clear the lowest bit without knowing the register width
return target ^ (target & 1)
return super()._resolve_target(instruction, emu, call)
return super()._resolve_target(instruction, emu)
@override
def _parse_memory(

@ -310,14 +310,14 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
return base + op.mem.disp + scale
@override
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None, call=False):
def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
# Only handle 'ret', otherwise fallback to default implementation
if X86_INS_RET != instruction.id or len(instruction.operands) > 1:
return super()._resolve_target(instruction, emu, call=call)
return super()._resolve_target(instruction, emu)
# Stop disassembling at RET if we won't know where it goes to without emulation
if instruction.address != pwndbg.gdblib.regs.pc:
return super()._resolve_target(instruction, emu, call=call)
return super()._resolve_target(instruction, emu)
# Otherwise, resolve the return on the stack
pop = instruction.operands[0].before_value if instruction.operands else 0

Loading…
Cancel
Save