diff --git a/pwndbg/aglib/disasm/disassembly.py b/pwndbg/aglib/disasm/disassembly.py index 380759a67..b2c8e9fe8 100644 --- a/pwndbg/aglib/disasm/disassembly.py +++ b/pwndbg/aglib/disasm/disassembly.py @@ -24,6 +24,7 @@ import pwndbg.aglib.disasm.arm import pwndbg.aglib.disasm.disassembly import pwndbg.aglib.disasm.loongarch64 import pwndbg.aglib.disasm.mips +import pwndbg.aglib.disasm.ppc import pwndbg.aglib.disasm.riscv import pwndbg.aglib.disasm.x86 import pwndbg.aglib.memory @@ -555,6 +556,7 @@ ALL_DISASSEMBLY_ASSISTANTS: Dict[ "loongarch64": lambda: pwndbg.aglib.disasm.loongarch64.Loong64DisassemblyAssistant( "loongarch64" ), + "powerpc": lambda: pwndbg.aglib.disasm.ppc.PowerPCDisassemblyAssistant("powerpc"), } diff --git a/pwndbg/aglib/disasm/ppc.py b/pwndbg/aglib/disasm/ppc.py index e69de29bb..06927f44a 100644 --- a/pwndbg/aglib/disasm/ppc.py +++ b/pwndbg/aglib/disasm/ppc.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +from typing import Callable +from typing import Dict + +from capstone import * # noqa: F403 +from capstone.ppc_const import * # noqa: F403 +from typing_extensions import override + +import pwndbg.aglib.disasm.arch +from pwndbg.aglib.disasm.instruction import InstructionCondition +from pwndbg.aglib.disasm.instruction import PwndbgInstruction +from pwndbg.emu.emulator import Emulator + +POWERPC_CONDITIONAL_BRANCHES = { + PPC_INS_BC, + PPC_INS_ALIAS_BT, + PPC_INS_ALIAS_BF, + PPC_INS_ALIAS_BTLR, + PPC_INS_ALIAS_BFLR, +} + +POWERPC_RETURN_INSTRUCTION = {PPC_INS_ALIAS_BLR, PPC_INS_ALIAS_BTLR, PPC_INS_ALIAS_BFLR} + + +# PowerPC branch instructions are pretty complex and whether or not the branch is taken depends on 3 factors: +# 1. bi - index into cr register, a flags register with conditions (less than, greater than, equal, overflow) +# 2. bo - a bitfield that modifies the evaluation of the condition +# 3. CTR register - a register that, depending on bo, can be read and modified and effects the result of the branch +def is_branch_taken(cr: int, ctr: int, bi: int, bo: int) -> bool | None: + # Valid values for bo (5 bit value): https://www.ibm.com/docs/en/aix/7.2.0?topic=set-bc-branch-conditional-instruction + # The `x` mean it can be either 0 or 1, it is irrelevant to the branch condition (used to hint that the branch is or isn't taken) + # 0000x - Decrement CTR. Branch if CTR is not 0 and condition is false + # 0001x - Decrement CTR. Branch if CTR is 0 and condition is false + # 001xx - Branch if condition is false + # 0100x - Decrement CTR. Branch if CTR is not 0 and condition is true + # 0101x - Decrement CTR. Branch if CTR is 0 and condition is true + # 011xx - Branch if the condition is true. + # 1x00x - Decrement CTR. Branch if CTR is not 0. + # 1x01x - Decrement CTR. Branch if CTR is 0 + # 1x1xx - Always branch + + # GDB `cr` register consists of cr0 .... cr7, where cr0 composes the most-significant bit positions. + # This is why we flip the offset that we access. + check_cr_offset = 31 - bi + condition = (cr >> check_cr_offset) & 1 == 1 + + if (bo & 0b11110) == 0b00000: # 0000x + ctr -= 1 + return ctr != 0 and not condition + elif (bo & 0b11110) == 0b00010: # 0001x + ctr -= 1 + return ctr == 0 and not condition + elif (bo & 0b11100) == 0b00100: # 001xx + return not condition + elif (bo & 0b11110) == 0b01000: # 0100x + ctr -= 1 + return ctr != 0 and condition + elif (bo & 0b11110) == 0b01010: # 0101x + ctr -= 1 + return ctr == 0 and condition + elif (bo & 0b11100) == 0b01100: # 011xx + return condition + elif (bo & 0b10110) == 0b10000: # 1x00x + ctr -= 1 + return ctr != 0 + elif (bo & 0b10110) == 0b10010: # 1x01x + ctr -= 1 + return ctr == 0 + elif (bo & 0b10100) == 0b10100: # 1x1xx + return True + + # This case should never be reached + return None + + +class PowerPCDisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): + saved_ctr: int | None = None + + def __init__(self, architecture) -> None: + super().__init__(architecture) + + self.annotation_handlers: Dict[int, Callable[[PwndbgInstruction, Emulator], None]] = {} + + @override + def _prepare(self, instruction: PwndbgInstruction, emu: Emulator) -> None: + # Prepare is called before emulation. + # At this point, we want to read the value of the ctr register. + # This is because branch instructions might mutate ctr within the emulator, which the read_register_name may fetch from + # The _conditional() function is called after emulation is stepped, so to read the original + # value of CTR, we have to read it beforehand. + + if instruction.id in POWERPC_CONDITIONAL_BRANCHES: + self.saved_ctr = self._read_register_name(instruction, "ctr", emu) + + @override + def _condition(self, instruction: PwndbgInstruction, emu: Emulator) -> InstructionCondition: + cr = self._read_register_name(instruction, "cr", emu) + + if cr is None or self.saved_ctr is None: + # We can't reason about the value of cr register + return InstructionCondition.UNDETERMINED + + if instruction.id in POWERPC_CONDITIONAL_BRANCHES: + is_taken = is_branch_taken( + cr, self.saved_ctr, instruction.cs_insn.bc.bi, instruction.cs_insn.bc.bo + ) + + if is_taken is None: + return InstructionCondition.UNDETERMINED + + return InstructionCondition.TRUE if is_taken else InstructionCondition.FALSE + + return InstructionCondition.UNDETERMINED + + @override + def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None): + if instruction.id in POWERPC_RETURN_INSTRUCTION: + return self._read_register_name(instruction, "lr", emu) + + return super()._resolve_target(instruction, emu) diff --git a/pwndbg/emu/emulator.py b/pwndbg/emu/emulator.py index 9a8111a78..7ffb7200e 100644 --- a/pwndbg/emu/emulator.py +++ b/pwndbg/emu/emulator.py @@ -14,6 +14,7 @@ from typing import Tuple import capstone as C import unicorn as U +import unicorn.ppc_const import pwndbg.aglib.arch import pwndbg.aglib.disasm.disassembly @@ -33,6 +34,7 @@ import pwndbg.lib.regs from pwndbg import color from pwndbg.aglib.disasm.instruction import PwndbgInstruction from pwndbg.color.syntax_highlight import syntax_highlight +from pwndbg.lib.arch import PWNDBG_SUPPORTED_ARCHITECTURES_TYPE if pwndbg.dbg.is_gdblib_available(): import gdb @@ -80,7 +82,7 @@ def create_reg_to_const_map( # Map our internal architecture names onto Unicorn Engine's architecture types. -arch_to_UC = { +arch_to_UC: Dict[PWNDBG_SUPPORTED_ARCHITECTURES_TYPE, int] = { "i386": U.UC_ARCH_X86, "x86-64": U.UC_ARCH_X86, "mips": U.UC_ARCH_MIPS, @@ -88,14 +90,14 @@ arch_to_UC = { "arm": U.UC_ARCH_ARM, "armcm": U.UC_ARCH_ARM, "aarch64": U.UC_ARCH_ARM64, - # 'powerpc': U.UC_ARCH_PPC, + "powerpc": U.UC_ARCH_PPC, "rv32": U.UC_ARCH_RISCV, "rv64": U.UC_ARCH_RISCV, "s390x": U.UC_ARCH_S390X, } # Architecture specific maps: Map<"UC_*_REG_*",constant> -arch_to_UC_consts = { +arch_to_UC_consts: Dict[PWNDBG_SUPPORTED_ARCHITECTURES_TYPE, Dict[str, int]] = { "i386": parse_consts(U.x86_const), "x86-64": parse_consts(U.x86_const), "mips": parse_consts(U.mips_const), @@ -103,13 +105,14 @@ arch_to_UC_consts = { "arm": parse_consts(U.arm_const), "armcm": parse_consts(U.arm_const), "aarch64": parse_consts(U.arm64_const), + "powerpc": parse_consts(unicorn.ppc_const), "rv32": parse_consts(U.riscv_const), "rv64": parse_consts(U.riscv_const), "s390x": parse_consts(U.s390x_const), } # Architecture specific maps: Map -arch_to_reg_const_map = { +arch_to_reg_const_map: Dict[PWNDBG_SUPPORTED_ARCHITECTURES_TYPE, Dict[str, int]] = { "i386": create_reg_to_const_map(arch_to_UC_consts["i386"]), "x86-64": create_reg_to_const_map( arch_to_UC_consts["x86-64"], @@ -122,15 +125,50 @@ arch_to_reg_const_map = { "aarch64": create_reg_to_const_map( arch_to_UC_consts["aarch64"], {"CPSR": U.arm64_const.UC_ARM64_REG_NZCV} ), + "powerpc": create_reg_to_const_map( + arch_to_UC_consts["powerpc"], + { + "R0": unicorn.ppc_const.UC_PPC_REG_0, + "SP": unicorn.ppc_const.UC_PPC_REG_1, + "R2": unicorn.ppc_const.UC_PPC_REG_2, + "R3": unicorn.ppc_const.UC_PPC_REG_3, + "R4": unicorn.ppc_const.UC_PPC_REG_4, + "R5": unicorn.ppc_const.UC_PPC_REG_5, + "R6": unicorn.ppc_const.UC_PPC_REG_6, + "R7": unicorn.ppc_const.UC_PPC_REG_7, + "R8": unicorn.ppc_const.UC_PPC_REG_8, + "R9": unicorn.ppc_const.UC_PPC_REG_9, + "R10": unicorn.ppc_const.UC_PPC_REG_10, + "R11": unicorn.ppc_const.UC_PPC_REG_11, + "R12": unicorn.ppc_const.UC_PPC_REG_12, + "R13": unicorn.ppc_const.UC_PPC_REG_13, + "R14": unicorn.ppc_const.UC_PPC_REG_14, + "R15": unicorn.ppc_const.UC_PPC_REG_15, + "R16": unicorn.ppc_const.UC_PPC_REG_16, + "R17": unicorn.ppc_const.UC_PPC_REG_17, + "R18": unicorn.ppc_const.UC_PPC_REG_18, + "R19": unicorn.ppc_const.UC_PPC_REG_19, + "R20": unicorn.ppc_const.UC_PPC_REG_20, + "R21": unicorn.ppc_const.UC_PPC_REG_21, + "R22": unicorn.ppc_const.UC_PPC_REG_22, + "R23": unicorn.ppc_const.UC_PPC_REG_23, + "R24": unicorn.ppc_const.UC_PPC_REG_24, + "R25": unicorn.ppc_const.UC_PPC_REG_25, + "R26": unicorn.ppc_const.UC_PPC_REG_26, + "R27": unicorn.ppc_const.UC_PPC_REG_27, + "R28": unicorn.ppc_const.UC_PPC_REG_28, + "R29": unicorn.ppc_const.UC_PPC_REG_29, + "R30": unicorn.ppc_const.UC_PPC_REG_30, + "R31": unicorn.ppc_const.UC_PPC_REG_31, + }, + ), "rv32": create_reg_to_const_map(arch_to_UC_consts["rv32"]), "rv64": create_reg_to_const_map(arch_to_UC_consts["rv64"]), "s390x": create_reg_to_const_map(arch_to_UC_consts["s390x"]), } # Architectures for which we want to enable virtual TLB mode -enable_virtual_tlb = { - "s390x": True, -} +enable_virtual_tlb = {"s390x": True, "powerpc": True} # combine the flags with | operator. -1 for all ( diff --git a/tests/binaries/qemu_user/Makefile b/tests/binaries/qemu_user/Makefile index 1775ae938..ef7e52803 100644 --- a/tests/binaries/qemu_user/Makefile +++ b/tests/binaries/qemu_user/Makefile @@ -14,6 +14,7 @@ CC.loongarch64 = ${ZIGCC} CC.s390x = ${ZIGCC} CC.powerpc32 = ${ZIGCC} CC.powerpc64 = ${ZIGCC} +CC.powerpc64le = ${ZIGCC} CC.sparc64 = ${ZIGCC} ALL_FLAGS = -g @@ -31,6 +32,7 @@ CFLAGS.loongarch64 = $(ALL_FLAGS) --target=loongarch64-linux-musl CFLAGS.s390x = $(ALL_FLAGS) --target=s390x-linux-musl -mcpu=z13 CFLAGS.powerpc32 = $(ALL_FLAGS) --target=powerpc-linux-musl CFLAGS.powerpc64 = $(ALL_FLAGS) --target=powerpc64-linux-musl +CFLAGS.powerpc64le = $(ALL_FLAGS) --target=powerpc64le-linux-musl AARCH64_SOURCES := $(wildcard *.aarch64.c) @@ -70,7 +72,7 @@ SPARC64_SOURCES := $(wildcard *.sparc64.c) SPARC64_TARGETS := $(SPARC64_SOURCES:.sparc64.c=.sparc64.out) -ARCHES_TO_COMPILE_BASIC = aarch64 arm riscv32 riscv64 mips32 mipsel32 mips64el loongarch64 s390x +ARCHES_TO_COMPILE_BASIC = aarch64 arm riscv32 riscv64 mips32 mipsel32 mips64el loongarch64 s390x powerpc64le # Build basic.c (which relies on libc) for these architectures BASIC_C_TARGETS = $(ARCHES_TO_COMPILE_BASIC:%=basic.%.out) basic.%.out: basic.c diff --git a/tests/library/qemu_user/conftest.py b/tests/library/qemu_user/conftest.py index 60ae1cc4a..1a9dbb406 100644 --- a/tests/library/qemu_user/conftest.py +++ b/tests/library/qemu_user/conftest.py @@ -30,6 +30,7 @@ COMPILATION_TARGETS_TYPE = Literal[ "loongarch64", "powerpc32", "powerpc64", + "powerpc64le", "mips32", "mipsel32", "mips64el", @@ -56,6 +57,7 @@ COMPILE_AND_RUN_INFO: Dict[COMPILATION_TARGETS_TYPE, Tuple[str, Tuple[str, ...], "sparc64": ("sparc64-freestanding", (), "sparc64"), "powerpc32": ("powerpc-freestanding", (), "ppc"), "powerpc64": ("powerpc64-freestanding", (), "ppc64"), + "powerpc64le": ("powerpc64le-freestanding", (), "ppc64le"), } diff --git a/tests/library/qemu_user/tests/test_basic.py b/tests/library/qemu_user/tests/test_basic.py index 7493c1a28..58ebee348 100644 --- a/tests/library/qemu_user/tests/test_basic.py +++ b/tests/library/qemu_user/tests/test_basic.py @@ -65,3 +65,7 @@ def test_basic_s390x(qemu_start_binary): def test_basic_loongarch64(qemu_start_binary): helper(qemu_start_binary, "basic.loongarch64.out", "loongarch64") + + +def test_basic_powerpc64le(qemu_start_binary): + helper(qemu_start_binary, "basic.powerpc64le.out", "powerpc64le")