diff --git a/pwndbg/__init__.py b/pwndbg/__init__.py index 0fdd26fc6..89c915117 100644 --- a/pwndbg/__init__.py +++ b/pwndbg/__init__.py @@ -3,6 +3,14 @@ import gdb import pwndbg.arch import pwndbg.arguments +import pwndbg.disasm +import pwndbg.disasm.arm +import pwndbg.disasm.jump +import pwndbg.disasm.mips +import pwndbg.disasm.ppc +import pwndbg.disasm.sparc +import pwndbg.disasm.x86 + import pwndbg.vmmap import pwndbg.dt import pwndbg.memory @@ -31,6 +39,7 @@ import pwndbg.commands.rop import pwndbg.commands.shell import pwndbg.commands.aslr import pwndbg.commands.misc +import pwndbg.commands.next __all__ = [ 'arch', @@ -93,9 +102,8 @@ handle SIGALRM print nopass handle SIGSEGV stop print nopass """.strip() % prompt -for line in pre_commands.splitlines(): - if line: - gdb.execute(line) +for line in pre_commands.strip().splitlines(): + gdb.execute(line) msg = "Loaded %i commands. Type pwndbg for a list." % len(pwndbg.commands._Command.commands) print(pwndbg.color.red(msg)) diff --git a/pwndbg/arch.py b/pwndbg/arch.py index eb0fcb88c..e446ed90a 100644 --- a/pwndbg/arch.py +++ b/pwndbg/arch.py @@ -1,17 +1,23 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import collections import struct import sys import gdb import pwndbg.events import pwndbg.memoize +import pwndbg.memory +import pwndbg.regs import pwndbg.typeinfo +from capstone import * + current = 'i386' ptrmask = 0xfffffffff endian = 'little' ptrsize = pwndbg.typeinfo.ptrsize fmt = '=i' -disasm = lambda: None def fix_arch(arch): arches = ['x86-64', 'i386', 'mips', 'powerpc', 'sparc', 'arm', 'aarch64', arch] @@ -37,11 +43,15 @@ def update(): (8, 'big'): '>Q', }.get((m.ptrsize, m.endian)) - m.disasm = gdb.selected_frame().architecture().disassemble - - def pack(integer): return struct.pack(fmt, integer & ptrmask) def unpack(data): return struct.unpack(fmt, data)[0] + +def signed(integer): + return unpack(pack(integer), signed=True) + +def unsigned(integer): + return unpack(pack(integer)) + diff --git a/pwndbg/arguments.py b/pwndbg/arguments.py index 9c9ee86fb..4fb212b9f 100644 --- a/pwndbg/arguments.py +++ b/pwndbg/arguments.py @@ -45,7 +45,7 @@ ida_replacements = { } -def arguments(instruction): +def get(instruction): """ Returns an array containing the arguments to the current function, if $pc is a 'call' or 'bl' type instruction. @@ -55,13 +55,19 @@ def arguments(instruction): if instruction.address != pwndbg.regs.pc: return [] - if not instruction.target: + if CS_GRP_CALL not in instruction.groups: return [] - if CS_GRP_CALL not in instruction.groups: + # Not sure of any OS which allows multiple operands on + # a call instruction. + assert len(instruction.operands) == 1 + + target = instruction.operands[0].int + + if not target: return [] - sym = pwndbg.symbol.get(instruction.target) + sym = pwndbg.symbol.get(target) if not sym: return [] @@ -75,8 +81,8 @@ def arguments(instruction): args = [] # Try to grab the data out of IDA - if not func and instruction.target: - typename = pwndbg.ida.GetType(instruction.target) + if not func and target: + typename = pwndbg.ida.GetType(target) if typename: typename += ';' diff --git a/pwndbg/color.py b/pwndbg/color.py index 5b7de3cbf..a73b0ba2f 100644 --- a/pwndbg/color.py +++ b/pwndbg/color.py @@ -17,7 +17,6 @@ UNDERLINE = "\x1b[4m" STACK = YELLOW HEAP = BLUE CODE = RED -RWX = RED + BOLD + UNDERLINE DATA = PURPLE def normal(x): return NORMAL + x diff --git a/pwndbg/commands/context.py b/pwndbg/commands/context.py index 343440248..1b1f63b9d 100644 --- a/pwndbg/commands/context.py +++ b/pwndbg/commands/context.py @@ -88,7 +88,7 @@ def context_code(): # If we didn't disassemble backward, try to make sure # that the amount of screen space taken is roughly constant. while len(result) < 11: - result.insert(0, '') + result.append('') return banner + result diff --git a/pwndbg/commands/nearpc.py b/pwndbg/commands/nearpc.py index 66700db5f..00949763d 100644 --- a/pwndbg/commands/nearpc.py +++ b/pwndbg/commands/nearpc.py @@ -61,8 +61,9 @@ def nearpc(pc=None, lines=None, to_string=False): for i,s in enumerate(symbols): symbols[i] = s.ljust(longest_sym) - # Print out each instruction prev = None + + # Print out each instruction for i,s in zip(instructions, symbols): asm = pwndbg.disasm.color.instruction(i) prefix = ' =>' if i.address == pc else ' ' @@ -73,26 +74,27 @@ def nearpc(pc=None, lines=None, to_string=False): line = ' '.join((prefix, "%#x" % i.address, s or '', asm)) - old, prev = prev, i - - # Put an ellipsis between discontiguous code groups - if not old: - pass - elif old.address + old.size != i.address: + # If there was a branch before this instruction which was not + # contiguous, put in some ellipses. + if prev and prev.address + prev.size != i.address: result.append('...') - # Put an empty line after fall-through basic blocks - elif any(g in old.groups for g in (CS_GRP_CALL, CS_GRP_JUMP, CS_GRP_RET)): + + # Otherwise if it's a branch and it *is* contiguous, just put + # and empty line. + elif prev and any(g in prev.groups for g in (CS_GRP_CALL, CS_GRP_JUMP, CS_GRP_RET)): result.append('') result.append(line) # For call instructions, attempt to resolve the target and # determine the number of arguments. - for arg, value in pwndbg.arguments.arguments(i): + for arg, value in pwndbg.arguments.get(i): code = False if arg.type == 'char' else True pretty = pwndbg.chain.format(value, code=code) result.append('%8s%-10s %s' % ('',arg.name+':', pretty)) + prev = i + if not to_string: print('\n'.join(result)) diff --git a/pwndbg/commands/next.py b/pwndbg/commands/next.py new file mode 100644 index 000000000..f3d73d888 --- /dev/null +++ b/pwndbg/commands/next.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Stepping until an event occurs +""" +import gdb +import pwndbg.commands +import pwndbg.next + +@pwndbg.commands.Command +@pwndbg.commands.OnlyWhenRunning +def nextjmp(*args): + """Breaks at the next jump instruction""" + if pwndbg.next.break_next_branch(): + pwndbg.commands.context.context() + +@pwndbg.commands.Command +@pwndbg.commands.OnlyWhenRunning +def nextj(*args): + """Breaks at the next jump instruction""" + nextjmp(*args) + +@pwndbg.commands.Command +@pwndbg.commands.OnlyWhenRunning +def nextjump(*args): + """Breaks at the next jump instruction""" + nextjmp(*args) + +@pwndbg.commands.Command +@pwndbg.commands.OnlyWhenRunning +def nextcall(*args): + """Breaks at the next call instruction""" + if pwndbg.next.break_next_call(): + pwndbg.commands.context.context() + +@pwndbg.commands.Command +@pwndbg.commands.OnlyWhenRunning +def nextc(*args): + """Breaks at the next call instruction""" + nextcall(*args) + + +@pwndbg.commands.Command +@pwndbg.commands.OnlyWhenRunning +def stepover(*args): + """Sets a breakpoint on the instruction after this one""" + pwndbg.next.break_on_next(*args) + + +@pwndbg.commands.Command +@pwndbg.commands.OnlyWhenRunning +def so(*args): + stepover(*args) + diff --git a/pwndbg/commands/telescope.py b/pwndbg/commands/telescope.py index 64634d576..6bf013a74 100644 --- a/pwndbg/commands/telescope.py +++ b/pwndbg/commands/telescope.py @@ -5,6 +5,7 @@ Prints out pointer chains starting at some address in memory. Generally used to print out the stack or register values. """ +import collections import pwndbg.chain import pwndbg.commands import pwndbg.memory @@ -32,7 +33,9 @@ def telescope(address=None, count=8, to_string=False): address = int(address) count = int(count) - reg_values = {r:v for (r,v) in pwndbg.regs.items()} + reg_values = collections.defaultdict(lambda: []) + for reg in pwndbg.regs.common: + reg_values[pwndbg.regs[reg]].append(reg) # address = pwndbg.memory.poi(pwndbg.typeinfo.ppvoid, address) ptrsize = pwndbg.typeinfo.ptrsize @@ -43,11 +46,7 @@ def telescope(address=None, count=8, to_string=False): # Find all registers which show up in the trace regs = {} for i in range(start, stop, step): - regs[i] = [] - for reg, regval in reg_values.items(): - if i <= regval < i+ptrsize: - regs[i].append(reg) - regs[i] = ' '.join(regs[i]) + regs[i] = ' '.join(reg_values[i]) # Find the longest set of register information if regs: diff --git a/pwndbg/disasm/__init__.py b/pwndbg/disasm/__init__.py index 4889eb21a..ae22bf3eb 100644 --- a/pwndbg/disasm/__init__.py +++ b/pwndbg/disasm/__init__.py @@ -8,12 +8,7 @@ import collections import gdb import pwndbg.arch -import pwndbg.disasm.mips -import pwndbg.disasm.arm -import pwndbg.disasm.ppc -import pwndbg.disasm.x86 -import pwndbg.disasm.jump -import pwndbg.disasm.sparc +import pwndbg.disasm.arch import pwndbg.ida import pwndbg.memory import pwndbg.symbol @@ -49,63 +44,37 @@ for cs in CapstoneArch.values(): # # This allows us to consistently disassemble backward. VariableInstructionSizeMax = { - 'i386': 16, + 'i386': 16, 'x86-64': 16, } -backward_cache = {} - - -def get_target(instruction): - """ - Make a best effort to determine what value or memory address - is important in a given instruction. For example: - - - Any single-operand instruction ==> that value - - push rax ==> evaluate rax - - Jump or call ==> target address - - jmp rax ==> evaluate rax - - jmp 0xdeadbeef ==> deadbeef - - Memory load or store ==> target address - - mov [eax], ebx ==> evaluate eax - - Register move ==> source value - - mov eax, ebx ==> evaluate ebx - - Register manipulation ==> value after execution* - - lea eax, [ebx*4] ==> evaluate ebx*4 - - Register arguments are only evaluated for the next instruction. - - Returns: - A tuple containing the resolved value (or None) and - a boolean indicating whether the value is a constant. - """ - return { - 'i386': pwndbg.disasm.x86.resolve, - 'x86-64': pwndbg.disasm.x86.resolve - }.get(pwndbg.arch.current, lambda *a: (None,None))(instruction) - +backward_cache = collections.defaultdict(lambda: 0) def get_disassembler(pc): arch = pwndbg.arch.current d = CapstoneArch[arch] if arch in ('arm', 'aarch64'): - d.mode = {0:CS_MODE_ARM,1:CS_MODE_THUMB}[pc & 1] + d.mode = {0:CS_MODE_ARM,0x20:CS_MODE_THUMB}[pwndbg.regs.cpsr & 0x20] else: d.mode = {4:CS_MODE_32, 8:CS_MODE_64}[pwndbg.arch.ptrsize] return d +@pwndbg.memoize.reset_on_cont def get_one_instruction(address): md = get_disassembler(address) size = VariableInstructionSizeMax.get(pwndbg.arch.current, 4) data = pwndbg.memory.read(address, size, partial=True) for ins in md.disasm(bytes(data), address, 1): - ins.target, ins.target_constant = get_target(ins) + pwndbg.disasm.arch.DisassemblyAssistant.enhance(ins) return ins def one(address=None): + if address == 0: + return None if address is None: address = pwndbg.regs.pc for insn in get(address, 1): + backward_cache[insn.next] = insn.address return insn def fix(i): @@ -127,49 +96,34 @@ def get(address, instructions=1): i = get_one_instruction(address) if i is None: break - backward_cache[address+i.size] = address - address += i.size + address = i.next retval.append(i) return retval def near(address, instructions=1): - # # If we have IDA, we can just use it to find out where the various - # # isntructions are. - # if pwndbg.ida.available(): - # head = address - # for i in range(instructions): - # head = pwndbg.ida.PrevHead(head) - - # retval = [] - # for i in range(2*instructions + 1): - # retval.append(get(head)) - # head = pwndbg.ida.NextHead(head) - - # See if we can satisfy the request based on the instruction - # length cache. - needle = address - insns = [] - while len(insns) < instructions and needle in backward_cache: - needle = backward_cache[needle] - insn = one(needle) - if not insn: - return insns - insns.insert(0, insn) - current = one(address) - if not current: - return insns - - target = current.target - - if not pwndbg.disasm.jump.is_jump_taken(current): - target = current.address + current.size + # Try to go backward by seeing which instructions we've returned + # before, which were followed by this one. + needle = address + insns = [] + insn = one(backward_cache[current.address]) + while insn and len(insns) < instructions: + insns.append(insn) + insn = one(backward_cache[insn.address]) + insns.reverse() + insns.append(current) - backward_cache[target] = address + # Now find all of the instructions moving forward. + insn = current + while insn and len(insns) < 1+(2*instructions): + # In order to avoid annoying cycles where the current instruction + # is a branch, which evaluates to true, and jumps back a short + # number of instructions. - insns.append(current) - insns.extend(get(target, instructions)) + insn = one(insn.next) + if insn: + insns.append(insn) return insns diff --git a/pwndbg/disasm/arch.py b/pwndbg/disasm/arch.py new file mode 100644 index 000000000..43065ef72 --- /dev/null +++ b/pwndbg/disasm/arch.py @@ -0,0 +1,225 @@ +import pwndbg.memoize +import pwndbg.symbol +import capstone +import collections +from capstone import * + +debug = False + +groups = {v:k for k,v in globals().items() if k.startswith('CS_GRP_')} +ops = {v:k for k,v in globals().items() if k.startswith('CS_OP_')} +access = {v:k for k,v in globals().items() if k.startswith('CS_AC_')} + +for value1, name1 in dict(access).items(): + for value2, name2 in dict(access).items(): + access.setdefault(value1 | value2, '%s | %s' % (name1, name2)) + + +class DisassemblyAssistant(object): + # Registry of all instances, {architecture: instance} + assistants = {} + + def __init__(self, architecture): + if architecture is not None: + self.assistants[architecture] = self + + self.op_handlers = { + CS_OP_IMM: self.immediate, + CS_OP_REG: self.register, + CS_OP_MEM: self.memory + } + + self.op_names = { + CS_OP_IMM: self.immediate_sz, + CS_OP_REG: self.register_sz, + CS_OP_MEM: self.memory_sz + } + + @staticmethod + def enhance(instruction): + enhancer = DisassemblyAssistant.assistants.get(pwndbg.arch.current, generic_assistant) + enhancer.enhance_operands(instruction) + enhancer.enhance_symbol(instruction) + enhancer.enhance_conditional(instruction) + enhancer.enhance_next(instruction) + + if debug: + print(enhancer.dump(instruction)) + + def enhance_conditional(self, instruction): + """ + Adds a ``condition`` field to the instruction. + + If the instruction is always executed unconditionally, the value + of the field is ``None``. + + If the instruction is executed conditionally, and we can be absolutely + sure that it will be executed, the value of the field is ``True``. + Generally, this implies that it is the next instruction to be executed. + + In all other cases, it is set to ``False``. + """ + c = self.condition(instruction) + + if c: + c = True + elif c is not None: + c = False + + instruction.condition = c + + def condition(self, instruction): + return False + + def enhance_next(self, instruction): + """ + Adds a ``next`` field to the instruction. + + By default, it is set to the address of the next linear + instruction. + + If the instruction is a non-"call" branch and either: + + - Is unconditional + - Is conditional, but is known to be taken + + And the target can be resolved, it is set to the address + of the jump target. + """ + next_addr = None + + if instruction.condition in (True, None): + next_addr = self.next(instruction) + + if next_addr is None: + next_addr = instruction.address + instruction.size + + instruction.next = next_addr & pwndbg.arch.ptrmask + + def next(self, instruction): + """ + Architecture-specific hook point for enhance_next. + """ + if CS_GRP_JUMP not in instruction.groups: + return None + + # At this point, all operands have been resolved. + # Assume only single-operand jumps. + if len(instruction.operands) != 1: + return None + + # Memory operands must be dereferenced + addr = instruction.operands[0].int + if instruction.operands[0].type == CS_OP_MEM: + addr = int(pwndbg.memory.poi(pwndbg.typeinfo.ppvoid, addr)) + + return addr + + def enhance_symbol(self, instruction): + """ + Adds a ``symbol`` and ``symbol_addr`` fields to the instruction. + + If, after parsing all of the operands, there is exactly one + value which resolved to a named symbol, it will be set to + that value. + + In all other cases, the value is ``None``. + """ + instruction.symbol = None + operands = [o for o in instruction.operands if o.symbol] + + if len(operands) != 1: + return + + o = operands[0] + + instruction.symbol = o.symbol + instruction.symbol_addr = o.int + + def enhance_operands(self, instruction): + """ + Enhances all of the operands in the instruction, by adding the following + fields: + + operand.str: + String of this operand, as it should appear in the + disassembly. + + operand.int: + Integer value of the operand, if it can be resolved. + + operand.symbol: + Resolved symbol name for this operand. + """ + current = (instruction.address == pwndbg.regs.pc) + + for i, op in enumerate(instruction.operands): + op.int = None + op.symbol = None + + op.int = self.op_handlers.get(op.type, lambda *a: None)(instruction, op) + op.str = self.op_names[op.type](instruction, op) + + if op.int: + op.symbol = pwndbg.symbol.get(op.int) + + + def immediate(self, instruction, operand): + return operand.value.imm + + def immediate_sz(self, instruction, operand): + value = operand.int + + if abs(value) < 0x10: + return "%i" % value + + return "%#x" % value + + def register(self, instruction, operand): + if instruction.address != pwndbg.regs.pc: + return None + + # # Don't care about registers which are only overwritten + # if operand.access & CS_AC_WRITE and not operand.access & CS_AC_READ: + # return None + + reg = operand.value.reg + name = instruction.reg_name(reg) + + return pwndbg.regs[name] + + def register_sz(self, instruction, operand): + reg = operand.value.reg + return instruction.reg_name(reg).lower() + + def memory(self, instruction, operand): + return None + + def memory_sz(self, instruction, operand): + raise NotImplementedError + + def dump(self, instruction): + ins = instruction + rv = [] + rv.append('%s %s' % (ins.mnemonic, ins.op_str)) + + for i, group in enumerate(ins.groups): + rv.append(' groups[%i] = %s' % (i, groups.get(group, group))) + + rv.append(' next = %#x' % (ins.next)) + rv.append(' condition = %r' % (ins.condition)) + + for i, op in enumerate(ins.operands): + rv.append(' operands[%i] = %s' % (i, ops.get(op.type, op.type))) + rv.append(' access = %s' % (access.get(op.access, op.access))) + + if op.int is not None: + rv.append(' int = %#x' % (op.int)) + if op.symbol is not None: + rv.append(' sym = %s' % (op.symbol)) + if op.str is not None: + rv.append(' str = %s' % (op.str)) + + return '\n'.join(rv) + +generic_assistant = DisassemblyAssistant(None) diff --git a/pwndbg/disasm/arm.py b/pwndbg/disasm/arm.py index e69de29bb..d8423d411 100644 --- a/pwndbg/disasm/arm.py +++ b/pwndbg/disasm/arm.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import collections + +import pwndbg.arch +import pwndbg.disasm.arch +import pwndbg.memory +import pwndbg.regs + +from capstone import * +from capstone.arm import * + +import pwndbg.disasm.arch + + +class DisassemblyAssistant(pwndbg.disasm.arch.DisassemblyAssistant): + def memory_sz(self, instruction, op): + segment = '' + parts = [] + + if op.mem.base != 0: + parts.append(instruction.reg_name(op.mem.base)) + + if op.mem.disp != 0: + parts.append("%#x" % op.value.mem.disp) + + if op.mem.index != 0: + index = pwndbg.regs[instruction.reg_name(op.mem.index)] + scale = op.mem.scale + parts.append("%s*%#x" % (index, scale)) + + return "[%s]" % (', '.join(parts)) + + def immediate_sz(self, instruction, operand): + return '#' + super(DisassemblyAssistant, self).immediate_sz(instruction, operand) + + def condition(self, instruction): + + # We can't reason about anything except the current instruction + if instruction.cc == ARM_CC_AL: + return None + + if instruction.address != pwndbg.regs.pc: + return False + + cpsr = pwndbg.regs.cpsr + + N = cpsr & (1<<31) + Z = cpsr & (1<<30) + C = cpsr & (1<<29) + V = cpsr & (1<<28) + + return { + ARM_CC_EQ: Z, + ARM_CC_NE: not Z, + ARM_CC_HS: C, + ARM_CC_LO: not C, + ARM_CC_MI: N, + ARM_CC_PL: not N, + ARM_CC_VS: V, + ARM_CC_VC: not V, + ARM_CC_HI: C and not Z, + ARM_CC_LS: Z or not C, + ARM_CC_GE: N == V, + ARM_CC_LT: N != V, + ARM_CC_GT: not Z and (N==V), + ARM_CC_LE: Z or (N != V), + }.get(instruction.id, None) + +assistant = DisassemblyAssistant('arm') + diff --git a/pwndbg/disasm/color.py b/pwndbg/disasm/color.py index 65bf67198..1ca854879 100644 --- a/pwndbg/disasm/color.py +++ b/pwndbg/disasm/color.py @@ -5,22 +5,15 @@ import pwndbg.chain import pwndbg.color import pwndbg.disasm.jump -capstone_branch_groups = [ -capstone.arm.ARM_GRP_CALL, -capstone.arm.ARM_GRP_JUMP, -capstone.arm64.ARM64_GRP_JUMP, -capstone.mips.MIPS_GRP_JUMP, -capstone.ppc.PPC_GRP_JUMP, -capstone.sparc.SPARC_GRP_JUMP, -capstone.x86.X86_GRP_CALL, -capstone.x86.X86_GRP_JUMP, -] +capstone_branch_groups = set(( +capstone.CS_GRP_CALL, +capstone.CS_GRP_JUMP +)) def instruction(ins): asm = u'%-06s %s' % (ins.mnemonic, ins.op_str) - branch = any(g in capstone_branch_groups for g in ins.groups) - taken = pwndbg.disasm.jump.is_jump_taken(ins) + branch = set(ins.groups) & capstone_branch_groups if branch: asm = pwndbg.color.bold(asm) @@ -41,9 +34,15 @@ def instruction(ins): else: asm = '%-36s <%s>' % (asm, target) - if taken: + if ins.condition: asm = pwndbg.color.green(u'✔ ') + asm else: asm = ' ' + asm + if ins.symbol: + if branch: + asm = '%s <%s>' % (asm, ins.symbol) + else: + asm = '%-50s # %s <%s>' % (asm, pwndbg.color.get(ins.symbol_addr), ins.symbol) + return asm diff --git a/pwndbg/disasm/x86.py b/pwndbg/disasm/x86.py index 03df74941..f7e396e16 100644 --- a/pwndbg/disasm/x86.py +++ b/pwndbg/disasm/x86.py @@ -1,8 +1,11 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import collections + import pwndbg.arch import pwndbg.memory import pwndbg.regs +import pwndbg.typeinfo from capstone import * from capstone.x86 import * @@ -12,131 +15,164 @@ ops = {v:k for k,v in globals().items() if k.startswith('X86_OP_')} regs = {v:k for k,v in globals().items() if k.startswith('X86_REG_')} access = {v:k for k,v in globals().items() if k.startswith('CS_AC_')} -def is_memory_op(op): - return op.type == X86_OP_MEM +pc = X86_REG_RSP + +class DisassemblyAssistant(pwndbg.disasm.arch.DisassemblyAssistant): + def regs(self, instruction, reg): + if reg == X86_REG_RIP: + return instruction.address + instruction.size + elif instruction.address == pwndbg.regs.pc: + name = instruction.reg_name(reg) + return pwndbg.regs[name] + else: + return None + + def memory(self, instruction, op): + current = (instruction.address == pwndbg.regs.pc) + + # The only register we can reason about if it's *not* the current + # instruction is $rip. For example: + # lea rdi, [rip - 0x1f6] + + target = 0 + + # There doesn't appear to be a good way to read from segmented + # addresses within GDB. + if op.mem.segment != 0: + return None -def get_access(ac): - rv = [] - for k,v in access.items(): - if ac & k: rv.append(v) - return ' | '.join(rv) + if op.mem.base != 0: + base = self.regs(instruction, op.mem.base) + if base is None: + return None + target += base -def dump(instruction): - ins = instruction - rv = [] - rv.append('%s %s' % (ins.mnemonic,ins.op_str)) - for i, group in enumerate(ins.groups): - rv.append(' groups[%i] = %s' % (i, groups[group])) - for i, op in enumerate(ins.operands): - rv.append(' operands[%i] = %s' % (i, ops[op.type])) - rv.append(' access = %s' % (get_access(op.access))) - return '\n'.join(rv) - -def resolve(instruction): - ops = list(instruction.operands) - - if instruction.mnemonic == 'nop' or not ops: - return (None,None) - - # 'ret', 'syscall' - if not ops: - return - - # 'jmp rax', 'call 0xdeadbeef' - if len(ops) == 1: - return get_operand_target(instruction, ops[0]) - - # 'mov eax, ebx' ==> ebx - # 'mov [eax], ebx' ==> [eax] - # 'mov eax, 0xdeadbeef' ==> 0xdeadbeef - if len(ops) == 2: - # If there are any memory operands, prefer those - for op in filter(is_memory_op, ops): - return get_operand_target(instruction, op) - - # Otherwise, prefer the 'source' operand - return get_operand_target(instruction, ops[1]) - - - print("Weird number of operands!!!!!") - print(dump(instruction)) - -def get_operand_target(instruction, op): - current = (instruction.address == pwndbg.regs.pc) - - # EB/E8/E9 or similar "call $+offset" - # Capstone handles the instruction + instruction size. - if op.type == X86_OP_IMM: - return (op.value.imm, True) - - # jmp/call REG - if op.type == X86_OP_REG: - if not current: - return (None, False) - - regname = instruction.reg_name(op.value.reg) - return (pwndbg.regs[regname], False) - - # base + disp + scale * offset - assert op.type == X86_OP_MEM, "Invalid operand type %i (%s)" % (op.type, ops[op.type]) - - target = 0 - - # Don't resolve registers - constant = bool(op.mem.base == 0 and op.mem.index == 0) - if not current and not constant: - return (None, False) - - if op.mem.segment != 0: - return (None, False) - - if op.mem.base != 0: - regname = instruction.reg_name(op.mem.base) - target += pwndbg.regs[regname] - - if op.mem.disp != 0: - target += op.value.mem.disp - - if op.mem.index != 0: - scale = op.mem.scale - index = pwndbg.regs[instruction.reg_name(op.mem.index)] - target += (scale * index) - - # for source operands, resolve - if op.access == CS_AC_READ: - try: - target = pwndbg.memory.u(target, op.size * 8) - except: - return (None, False) - - return (target, constant) - - -def is_jump_taken(instruction): - efl = pwndbg.regs.eflags - - cf = efl & (1<<0) - pf = efl & (1<<2) - af = efl & (1<<4) - zf = efl & (1<<6) - sf = efl & (1<<7) - of = efl & (1<<11) - - return { - X86_INS_JO: of, - X86_INS_JNO: not of, - X86_INS_JS: sf, - X86_INS_JNS: not sf, - X86_INS_JE: zf, - X86_INS_JNE: not zf, - X86_INS_JB: cf, - X86_INS_JAE: not cf, - X86_INS_JBE: cf or zf, - X86_INS_JA: not (cf or zf), - X86_INS_JL: sf != of, - X86_INS_JGE: sf == of, - X86_INS_JLE: zf or (sf != of), - X86_INS_JP: pf, - X86_INS_JNP: not pf, - X86_INS_JMP: True, - }.get(instruction.id, None) + if op.mem.disp != 0: + target += op.value.mem.disp + + if op.mem.index != 0: + scale = op.mem.scale + index = self.regs(instruction, op.mem.index) + if index is None: + return NOne + + target += (scale * index) + + return target + + def memory_sz(self, instruction, op): + arith = False + segment = op.mem.segment + disp = op.value.mem.disp + base = op.value.mem.base + index = op.value.mem.index + scale = op.value.mem.scale + sz = '' + + if segment != 0: + sz += '%s:' % instruction.reg_name(segment) + + if base != 0: + sz += instruction.reg_name(base) + arith = True + + if index != 0: + if arith: + sz += ' + ' + + index = pwndbg.regs[instruction.reg_name(index)] + sz += "%s*%#x" % (index, scale) + arith = True + + if op.mem.disp != 0: + if arith and op.mem.disp < 0: + sz += ' - ' + elif arith and op.mem.disp >= 0: + sz += ' + ' + + sz += ']' + return sz + + + def register(self, instruction, operand): + if operand.value.reg != X86_REG_RIP: + return super(DisassemblyAssistant, self).register(instruction, operand) + + return instruction.address + instruction.size + + def next(self, instruction): + # Only enhance 'ret' + if X86_INS_RET != instruction.id or len(instruction.operands) > 1: + return super(DisassemblyAssistant, self).next(instruction) + + # Stop disassembling at RET if we won't know where it goes to + if instruction.address != pwndbg.regs.pc: + return 0 + + # Otherwise, resolve the return on the stack + pop = 0 + if instruction.operands: + pop = instruction.operands[0].int + + address = (pwndbg.regs.sp) + (pwndbg.arch.ptrsize * pop) + + return int(pwndbg.memory.poi(pwndbg.typeinfo.ppvoid, address)) + + + + def condition(self, instruction): + # JMP is unconditional + if instruction.id in (X86_INS_JMP, X86_INS_RET, X86_INS_CALL): + return None + + # We can't reason about anything except the current instruction + if instruction.address != pwndbg.regs.pc: + return False + + efl = pwndbg.regs.eflags + + cf = efl & (1<<0) + pf = efl & (1<<2) + af = efl & (1<<4) + zf = efl & (1<<6) + sf = efl & (1<<7) + of = efl & (1<<11) + + return { + X86_INS_CMOVA: not (cf or zf), + X86_INS_CMOVAE: not cf, + X86_INS_CMOVB: cf, + X86_INS_CMOVBE: cf or zf, + X86_INS_CMOVE: zf, + X86_INS_CMOVG: not zf and (sf == of), + X86_INS_CMOVGE: sf == of, + X86_INS_CMOVL: sf != of, + X86_INS_CMOVLE: zf or (sf != of), + X86_INS_CMOVNE: not zf, + X86_INS_CMOVNO: not of, + X86_INS_CMOVNP: not pf, + X86_INS_CMOVNS: not sf, + X86_INS_CMOVO: of, + X86_INS_CMOVP: pf, + X86_INS_CMOVS: sf, + X86_INS_JA: not (cf or zf), + X86_INS_JAE: not cf, + X86_INS_JB: cf, + X86_INS_JBE: cf or zf, + X86_INS_JE: zf, + X86_INS_JG: not zf and (sf == of), + X86_INS_JGE: sf == of, + X86_INS_JL: sf != of, + X86_INS_JLE: zf or (sf != of), + X86_INS_JNE: not zf, + X86_INS_JNO: not of, + X86_INS_JNP: not pf, + X86_INS_JNS: not sf, + X86_INS_JO: of, + X86_INS_JP: pf, + X86_INS_JS: sf, + }.get(instruction.id, None) + + +assistant = DisassemblyAssistant('i386') +assistant = DisassemblyAssistant('x86-64') diff --git a/pwndbg/memoize.py b/pwndbg/memoize.py index 11ec0c249..a17f7b039 100644 --- a/pwndbg/memoize.py +++ b/pwndbg/memoize.py @@ -106,6 +106,17 @@ class reset_on_start(memoize): for obj in reset_on_start.caches: obj.clear() +class reset_on_cont(memoize): + caches = [] + kind = 'cont' + + @staticmethod + @pwndbg.events.cont + def __reset(): + for obj in reset_on_cont.caches: + obj.clear() + + class while_running(memoize): caches = [] kind = 'running' diff --git a/pwndbg/next.py b/pwndbg/next.py index 773b704f3..0df98b7e6 100644 --- a/pwndbg/next.py +++ b/pwndbg/next.py @@ -8,11 +8,53 @@ import gdb import pwndbg.disasm import pwndbg.regs +import capstone -def next_branch(callback, address=None): +jumps = set(( + capstone.CS_GRP_CALL, + capstone.CS_GRP_JUMP, + capstone.CS_GRP_RET, + capstone.CS_GRP_IRET +)) + +def next_branch(address=None): if address is None: - address = pwndbg.regs.pc + ins = pwndbg.disasm.one(pwndbg.regs.pc) + if not ins: + return None + address = ins.next + + ins = pwndbg.disasm.one(address) + while ins: + if set(ins.groups) & jumps: + return ins + ins = pwndbg.disasm.one(ins.next) + + return None + +def break_next_branch(address=None): + ins = next_branch(address) + + if ins: + gdb.Breakpoint("*%#x" % ins.address, internal=True, temporary=True) + gdb.execute('continue', from_tty=False, to_string=True) + return ins + +def break_next_call(address=None): + while True: + ins = break_next_branch(address) + + if not ins: + break + + if capstone.CS_GRP_CALL in ins.groups: + return ins + +def break_on_next(address=None): + address = address or pwndbg.regs.pc + ins = pwndbg.disasm.one(address) + + gdb.Breakpoint("*%#x" % (ins.address + ins.size), temporary=True) + gdb.execute('continue', from_tty=False, to_string=True) + - # Disassemble forward until we find *any* branch instruction - # Set a temporary, internal breakpoint on it so the user is - # not bothered. diff --git a/pwndbg/regs.py b/pwndbg/regs.py index d44d7cc38..73d66e809 100644 --- a/pwndbg/regs.py +++ b/pwndbg/regs.py @@ -4,6 +4,7 @@ Reading register value from the inferior, and provides a standardized interface to registers like "sp" and "pc". """ +import collections import re import sys from types import ModuleType @@ -13,78 +14,120 @@ import pwndbg.arch import pwndbg.compat import pwndbg.events import pwndbg.memoize +import pwndbg.proc class RegisterSet(object): - def __init__(self, pc, stack, frame, retaddr, flags, gpr, misc, args, retval): - self.pc = pc + #: Program counter register + pc = None + + #: Stack pointer register + stack = None + + #: Frame pointer register + frame = None + + #: Return address register + retaddr = None + + #: Flags register (eflags, cpsr) + flags = None + + #: List of native-size generalp-purpose registers + gpr = None + + #: List of miscellaneous, valid registers + misc = None + + #: Register-based arguments for most common ABI + regs = None + + #: Return value register + retval = None + + #: Common registers which should be displayed in the register context + common = None + + #: All valid registers + all = None + + def __init__(self, + pc='pc', + stack='sp', + frame=None, + retaddr=tuple(), + flags=tuple(), + gpr=tuple(), + misc=tuple(), + args=tuple(), + retval=None): + self.pc = pc self.stack = stack self.frame = frame self.retaddr = retaddr - self.flags = flags - self.gpr = gpr - self.misc = misc - self.args = args + self.flags = flags + self.gpr = gpr + self.misc = misc + self.args = args self.retval = retval - self.common = set(i for i in gpr + (frame, stack, pc) if i) - self.all = set(i for i in misc or tuple()) | set(flags or tuple()) | self.common + # In 'common', we don't want to lose the ordering of: + self.common = [] + for reg in gpr + (frame, stack, pc): + if reg and reg not in self.common: + self.common.append(reg) - self.common -= {None} - self.all -= {None} + self.all = set(i for i in misc) | set(flags) | set(self.common) + self.all -= {None} def __iter__(self): for r in self.all: yield r -arm = RegisterSet( 'pc', - 'sp', - None, - ('lr',), - ('cpsr',), - ('r0','r1','r2','r3','r4','r5','r6','r7','r8','r9','r10','r11','r12'), - tuple(), - ('r0','r1','r2','r3'), - 'r0') - -aarch64 = RegisterSet('pc', - 'sp', - None, - ('lr',), - ('cpsr',), - ('x0','x1','x2','x3','x4','x5','x6','x7','x8','x9','x10','x11','x12'), - tuple(), - ('x0','x1','x2','x3'), - 'x0') - - -amd64 = RegisterSet('rip', - 'rsp', - 'rbp', - tuple(), - ('eflags',), - ('rax','rbx','rcx','rdx','rdi','rsi', - 'r8', 'r9', 'r10','r11','r12', - 'r13','r14','r15'), - ('cs','ss','ds','es','fs','gs'), - ('rdi','rsi','rdx','rcx','r8','r9'), - 'rax') - -i386 = RegisterSet('eip', - 'esp', - 'ebp', - tuple(), - ('eflags',), - ('eax','ebx','ecx','edx','edi','esi'), - ('cs','ss','ds','es','fs','gs'), - ('*((void**)$sp+0)', - '*((void**)$sp+1)', - '*((void**)$sp+2)', - '*((void**)$sp+3)', - '*((void**)$sp+4)', - '*((void**)$sp+5)', - '*((void**)$sp+6)',), - 'eax') +arm = RegisterSet( retaddr = ('lr',), + flags = ('cpsr',), + gpr = tuple('r%i' % i for i in range(13)), + args = ('r0','r1','r2','r3'), + retval = 'r0') + +aarch64 = RegisterSet( retaddr = ('lr',), + flags = ('cpsr',), + gpr = tuple('x%i' % i for i in range(32)), + misc = tuple('w%i' % i for i in range(32)), + args = ('x0','x1','x2','x3'), + retval = 'x0') + + +amd64 = RegisterSet(pc = 'rip', + stack = 'rsp', + frame = 'rbp', + flags = ('eflags',), + gpr = ('rax','rbx','rcx','rdx','rdi','rsi', + 'r8', 'r9', 'r10','r11','r12', + 'r13','r14','r15'), + misc = ('cs','ss','ds','es','fs','gs', + 'ax','ah','al', + 'bx','bh','bl', + 'cx','ch','cl', + 'dx','dh','dl', + 'dil','sil','spl','bpl', + 'di','si','bp','sp','ip'), + args = ('rdi','rsi','rdx','rcx','r8','r9'), + retval = 'rax') + +i386 = RegisterSet( pc = 'eip', + stack = 'esp', + frame = 'ebp', + flags = ('eflags',), + gpr = ('eax','ebx','ecx','edx','edi','esi'), + misc = ('cs','ss','ds','es','fs','gs', + 'ax','ah','al', + 'bx','bh','bl', + 'cx','ch','cl', + 'dx','dh','dl', + 'dil','sil','spl','bpl', + 'di','si','bp','sp','ip'), + retval = 'eax') # http://math-atlas.sourceforge.net/devel/assembly/elfspec_ppc.pdf @@ -97,15 +140,12 @@ i386 = RegisterSet('eip', # r13 Small data area pointer register (points to TLS) # r14-r30 Registers used for local variables # r31 Used for local variables or "environment pointers" -powerpc = RegisterSet('pc', - 'sp', - None, - ('lr','r0'), - ('msr','xer'), - tuple('r%i' % i for i in range(3,32)), - ('cr','lr','r2'), - tuple(), - 'r3') +powerpc = RegisterSet( retaddr = ('lr','r0'), + flags = ('msr','xer'), + gpr = tuple('r%i' % i for i in range(3,32)), + misc = ('cr','lr','r2'), + args = tuple('r%i' for i in range(3,11)), + retval = 'r3') # http://people.cs.clemson.edu/~mark/sparc/sparc_arch_desc.txt # http://people.cs.clemson.edu/~mark/subroutines/sparc.html @@ -137,15 +177,13 @@ sparc_gp = tuple(['g%i' % i for i in range(1,8)] +['o%i' % i for i in range(0,6)] +['l%i' % i for i in range(0,8)] +['i%i' % i for i in range(0,6)]) -sparc = RegisterSet('pc', - 'o6', - 'i6', - ('o7',), - ('psr',), - sparc_gp, - tuple(), - ('i0','i1','i2','i3','i4','i5'), - 'o0') +sparc = RegisterSet(stack = 'o6', + frame = 'i6', + retaddr = ('o7',), + flags = ('psr',), + gpr = sparc_gp, + args = ('i0','i1','i2','i3','i4','i5'), + retval = 'o0') # http://logos.cs.uic.edu/366/notes/mips%20quick%20tutorial.htm @@ -161,17 +199,13 @@ sparc = RegisterSet('pc', # r29 => stack pointer # r30 => frame pointer # r31 => return address -mips = RegisterSet( 'pc', - 'sp', - 'fp', - ('ra',), - tuple(), - ('v0','v1','a0','a1','a2','a3') \ - + tuple('t%i' % i for i in range(10)) \ - + tuple('s%i' % i for i in range(9)), - tuple(), - ('a0','a1','a2','a3'), - 'v0') +mips = RegisterSet( frame = 'fp', + retaddr = ('ra',), + gpr = ('v0','v1','a0','a1','a2','a3') \ + + tuple('t%i' % i for i in range(10)) \ + + tuple('s%i' % i for i in range(9)), + args = ('a0','a1','a2','a3'), + retval = 'v0') arch_to_regs = { 'i386': i386, @@ -188,19 +222,24 @@ arch_to_regs = { class module(ModuleType): last = {} + @pwndbg.memoize.reset_on_stop def __getattr__(self, attr): try: - value = gdb.parse_and_eval('$' + attr.lstrip('$')) - if 'eflags' not in attr: - value = value.cast(pwndbg.typeinfo.ptrdiff) - else: - # Seriously, gdb? Only accepts uint32. + # Seriously, gdb? Only accepts uint32. + if 'eflags' in attr: + value = gdb.parse_and_eval('$' + attr.lstrip('$')) value = value.cast(pwndbg.typeinfo.uint32) + else: + value = gdb.newest_frame().read_register(attr) + value = value.cast(pwndbg.typeinfo.ptrdiff) + value = int(value) return value & pwndbg.arch.ptrmask - except gdb.error: + except ValueError: + # Unknown register return None + @pwndbg.memoize.reset_on_stop def __getitem__(self, item): if isinstance(item, int): return arch_to_regs[pwndbg.arch.current][item] @@ -234,6 +273,10 @@ class module(ModuleType): def gpr(self): return arch_to_regs[pwndbg.arch.current].gpr + @property + def common(self): + return arch_to_regs[pwndbg.arch.current].common + @property def frame(self): return arch_to_regs[pwndbg.arch.current].frame @@ -302,4 +345,4 @@ sys.modules[__name__] = module(__name__, '') @pwndbg.events.cont def update_last(): M = sys.modules[__name__] - M.last = {k:M[k] for k in M} + M.last = {k:M[k] for k in M.common} diff --git a/pwndbg/vmmap.py b/pwndbg/vmmap.py index 77285499b..9cdaef596 100644 --- a/pwndbg/vmmap.py +++ b/pwndbg/vmmap.py @@ -86,7 +86,7 @@ def explore(address_maybe): # Automatically ensure that all registers are explored on each stop @pwndbg.events.stop def explore_registers(): - for regname in pwndbg.regs.all: + for regname in pwndbg.regs.common: find(pwndbg.regs[regname])