From 75debdef36bd748aa41c1fe4765c77a3efd5951d Mon Sep 17 00:00:00 2001 From: patryk4815 Date: Thu, 17 Apr 2025 21:14:39 +0200 Subject: [PATCH] Add capstone for loongarch64 (#2885) * init loongarch64 capstone * loongarch64 fix resolving symbols Co-authored-by: OBarronCS <55004530+OBarronCS@users.noreply.github.com> * disasm display arch name in instruction repr * fix resolve target for `ALIAS_RET` instruction Co-authored-by: OBarronCS <55004530+OBarronCS@users.noreply.github.com> * fix reviewdog * rebase with dev branch * fix lint * fix lint * fix lint * fix typos --------- Co-authored-by: OBarronCS <55004530+OBarronCS@users.noreply.github.com> --- pwndbg/aglib/disasm/disassembly.py | 8 +++ pwndbg/aglib/disasm/instruction.py | 12 ++++ pwndbg/aglib/disasm/loongarch64.py | 99 ++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+) create mode 100644 pwndbg/aglib/disasm/loongarch64.py diff --git a/pwndbg/aglib/disasm/disassembly.py b/pwndbg/aglib/disasm/disassembly.py index b850c71ca..9c59bd73e 100644 --- a/pwndbg/aglib/disasm/disassembly.py +++ b/pwndbg/aglib/disasm/disassembly.py @@ -21,6 +21,7 @@ import pwndbg.aglib.arch import pwndbg.aglib.disasm.aarch64 import pwndbg.aglib.disasm.arm import pwndbg.aglib.disasm.disassembly +import pwndbg.aglib.disasm.loongarch64 import pwndbg.aglib.disasm.mips import pwndbg.aglib.disasm.riscv import pwndbg.aglib.disasm.x86 @@ -54,6 +55,7 @@ CapstoneArch = { "rv32": CS_ARCH_RISCV, "rv64": CS_ARCH_RISCV, "s390x": CS_ARCH_SYSTEMZ, + "loongarch64": CS_ARCH_LOONGARCH, } CapstoneEndian = { @@ -78,6 +80,7 @@ VariableInstructionSizeMax = { "rv32": 22, "rv64": 22, "s390x": 6, + "loongarch64": 4, } @@ -192,6 +195,8 @@ def get_disassembler(address): elif pwndbg.aglib.arch.name == "s390x": # The ptrsize base modes cause capstone.CsError: Invalid mode (CS_ERR_MODE) extra = 0 + elif pwndbg.aglib.arch.name == "loongarch64": + extra = CS_MODE_LOONGARCH64 else: extra = None @@ -549,6 +554,9 @@ ALL_DISASSEMBLY_ASSISTANTS: Dict[ "mips": lambda: pwndbg.aglib.disasm.mips.MipsDisassemblyAssistant("mips"), "rv32": lambda: pwndbg.aglib.disasm.riscv.RISCVDisassemblyAssistant("rv32"), "rv64": lambda: pwndbg.aglib.disasm.riscv.RISCVDisassemblyAssistant("rv64"), + "loongarch64": lambda: pwndbg.aglib.disasm.loongarch64.Loong64DisassemblyAssistant( + "loongarch64" + ), } diff --git a/pwndbg/aglib/disasm/instruction.py b/pwndbg/aglib/disasm/instruction.py index 8f74537b0..bee00870a 100644 --- a/pwndbg/aglib/disasm/instruction.py +++ b/pwndbg/aglib/disasm/instruction.py @@ -18,6 +18,10 @@ from capstone.aarch64 import AARCH64_INS_BLR from capstone.aarch64 import AARCH64_INS_BR from capstone.arm import ARM_INS_TBB from capstone.arm import ARM_INS_TBH +from capstone.loongarch import LOONGARCH_INS_ALIAS_JR +from capstone.loongarch import LOONGARCH_INS_B +from capstone.loongarch import LOONGARCH_INS_BL +from capstone.loongarch import LOONGARCH_INS_JIRL from capstone.mips import MIPS_INS_ALIAS_B from capstone.mips import MIPS_INS_ALIAS_BAL from capstone.mips import MIPS_INS_B @@ -82,6 +86,12 @@ UNCONDITIONAL_JUMP_INSTRUCTIONS: Dict[int, Set[int]] = { }, CS_ARCH_PPC: {PPC_INS_B, PPC_INS_BA, PPC_INS_BL, PPC_INS_BLA}, CS_ARCH_SYSTEMZ: {SYSTEMZ_INS_B, SYSTEMZ_INS_BAL, SYSTEMZ_INS_BALR}, + CS_ARCH_LOONGARCH: { + LOONGARCH_INS_B, + LOONGARCH_INS_BL, + LOONGARCH_INS_JIRL, + LOONGARCH_INS_ALIAS_JR, + }, } # See: https://github.com/capstone-engine/capstone/issues/2448 @@ -132,6 +142,8 @@ CAPSTONE_ARCH_MAPPING_STRING = { CS_ARCH_MIPS: "mips", CS_ARCH_SPARC: "sparc", CS_ARCH_RISCV: "RISCV", + CS_ARCH_SYSTEMZ: "s390x", + CS_ARCH_LOONGARCH: "loongarch", } diff --git a/pwndbg/aglib/disasm/loongarch64.py b/pwndbg/aglib/disasm/loongarch64.py new file mode 100644 index 000000000..3615da5a1 --- /dev/null +++ b/pwndbg/aglib/disasm/loongarch64.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing import Callable +from typing import Dict +from typing import List + +from capstone import * # noqa: F403 +from capstone.loongarch import * # noqa: F403 +from typing_extensions import override + +import pwndbg.aglib.disasm.arch +import pwndbg.lib.disasm.helpers as bit_math +from pwndbg.aglib.disasm.instruction import InstructionCondition +from pwndbg.aglib.disasm.instruction import PwndbgInstruction + +# Emulator currently requires GDB, and we only use it here for type checking. +if TYPE_CHECKING: + from pwndbg.emu.emulator import Emulator + +CONDITION_RESOLVERS: Dict[int, Callable[[List[int]], bool]] = { + LOONGARCH_INS_BEQZ: lambda ops: ops[0] == 0, + LOONGARCH_INS_BNEZ: lambda ops: ops[0] != 0, + LOONGARCH_INS_BEQ: lambda ops: ops[0] == ops[1], + LOONGARCH_INS_BNE: lambda ops: ops[0] != ops[1], + LOONGARCH_INS_BGE: lambda ops: bit_math.to_signed(ops[0], pwndbg.aglib.arch.ptrsize * 8) + >= bit_math.to_signed(ops[1], pwndbg.aglib.arch.ptrsize * 8), + LOONGARCH_INS_BLT: lambda ops: bit_math.to_signed(ops[0], pwndbg.aglib.arch.ptrsize * 8) + < bit_math.to_signed(ops[1], pwndbg.aglib.arch.ptrsize * 8), + LOONGARCH_INS_BLTU: lambda ops: ops[0] < ops[1], + LOONGARCH_INS_BGEU: lambda ops: ops[0] >= ops[1], +} + + +LOONGARCH_LOAD_INSTRUCTIONS: Dict[int, int] = {} + +LOONGARCH_STORE_INSTRUCTIONS: Dict[int, int] = {} + +LOONGARCH_BINARY_OPERATIONS: Dict[int, str] = {} + + +# This class enhances 64-bit Loongarch +class Loong64DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant): + def __init__(self, architecture) -> None: + super().__init__(architecture) + + self.annotation_handlers: Dict[int, Callable[[PwndbgInstruction, Emulator], None]] = {} + + @override + def _condition(self, instruction: PwndbgInstruction, emu: Emulator) -> InstructionCondition: + if len(instruction.operands) == 0: + return InstructionCondition.UNDETERMINED + + # Not using list comprehension because they run in a separate scope in which super() does not exist + resolved_operands: List[int] = [] + for op in instruction.operands: + resolved_operands.append( + super()._resolve_used_value(op.before_value, instruction, op, emu) + ) + + # If any of the relevent operands are None (we can't reason about them), quit. + if any(value is None for value in resolved_operands[:-1]): + # Note the [:-1]. Loongarch jump instructions have the target as the last operand + # https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#_beqz_bnez + return InstructionCondition.UNDETERMINED + + conditional = CONDITION_RESOLVERS.get(instruction.id, lambda *a: None)(resolved_operands) + + if conditional is None: + return InstructionCondition.UNDETERMINED + + return InstructionCondition.TRUE if conditional else InstructionCondition.FALSE + + @override + def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None): + if instruction.id == LOONGARCH_INS_ALIAS_RET: + return self._read_register_name(instruction, "ra", emu) + + # Manually compute target addresses for relative branches + if instruction.id in ( + LOONGARCH_INS_B, + LOONGARCH_INS_BL, + LOONGARCH_INS_BEQ, + LOONGARCH_INS_BNE, + LOONGARCH_INS_BLT, + LOONGARCH_INS_BGE, + LOONGARCH_INS_BGEU, + LOONGARCH_INS_BLTU, + LOONGARCH_INS_BEQZ, + LOONGARCH_INS_BNEZ, + ): + # The relative offset is always the last operand + return instruction.address + instruction.operands[-1].before_value + + if instruction.id == LOONGARCH_INS_JIRL: + if (offset_reg := instruction.operands[1].before_value) is not None: + return offset_reg + (instruction.operands[2].before_value << 2) + + return super()._resolve_target(instruction, emu)