Add capstone for loongarch64 (#2885)

* init loongarch64 capstone * loongarch64 fix resolving symbols Co-authored-by: OBarronCS <55004530+OBarronCS@users.noreply.github.com> * disasm display arch name in instruction repr * fix resolve target for `ALIAS_RET` instruction Co-authored-by: OBarronCS <55004530+OBarronCS@users.noreply.github.com> * fix reviewdog * rebase with dev branch * fix lint * fix lint * fix lint * fix typos --------- Co-authored-by: OBarronCS <55004530+OBarronCS@users.noreply.github.com>
8 months ago · 75debdef36
parent d2efb84d6a
commit 75debdef36
3 changed files with 119 additions and 0 deletions
--- a/pwndbg/aglib/disasm/disassembly.py
+++ b/pwndbg/aglib/disasm/disassembly.py
@ -21,6 +21,7 @@ import pwndbg.aglib.arch
 import pwndbg.aglib.disasm.aarch64
 import pwndbg.aglib.disasm.arm
 import pwndbg.aglib.disasm.disassembly
+import pwndbg.aglib.disasm.loongarch64
 import pwndbg.aglib.disasm.mips
 import pwndbg.aglib.disasm.riscv
 import pwndbg.aglib.disasm.x86
@ -54,6 +55,7 @@ CapstoneArch = {
    "rv32": CS_ARCH_RISCV,
    "rv64": CS_ARCH_RISCV,
    "s390x": CS_ARCH_SYSTEMZ,
+    "loongarch64": CS_ARCH_LOONGARCH,
 }

 CapstoneEndian = {
@ -78,6 +80,7 @@ VariableInstructionSizeMax = {
    "rv32": 22,
    "rv64": 22,
    "s390x": 6,
+    "loongarch64": 4,
 }


@ -192,6 +195,8 @@ def get_disassembler(address):
    elif pwndbg.aglib.arch.name == "s390x":
        # The ptrsize base modes cause capstone.CsError: Invalid mode (CS_ERR_MODE)
        extra = 0
+    elif pwndbg.aglib.arch.name == "loongarch64":
+        extra = CS_MODE_LOONGARCH64
    else:
        extra = None

@ -549,6 +554,9 @@ ALL_DISASSEMBLY_ASSISTANTS: Dict[
    "mips": lambda: pwndbg.aglib.disasm.mips.MipsDisassemblyAssistant("mips"),
    "rv32": lambda: pwndbg.aglib.disasm.riscv.RISCVDisassemblyAssistant("rv32"),
    "rv64": lambda: pwndbg.aglib.disasm.riscv.RISCVDisassemblyAssistant("rv64"),
+    "loongarch64": lambda: pwndbg.aglib.disasm.loongarch64.Loong64DisassemblyAssistant(
+        "loongarch64"
+    ),
 }


--- a/pwndbg/aglib/disasm/instruction.py
+++ b/pwndbg/aglib/disasm/instruction.py
@ -18,6 +18,10 @@ from capstone.aarch64 import AARCH64_INS_BLR
 from capstone.aarch64 import AARCH64_INS_BR
 from capstone.arm import ARM_INS_TBB
 from capstone.arm import ARM_INS_TBH
+from capstone.loongarch import LOONGARCH_INS_ALIAS_JR
+from capstone.loongarch import LOONGARCH_INS_B
+from capstone.loongarch import LOONGARCH_INS_BL
+from capstone.loongarch import LOONGARCH_INS_JIRL
 from capstone.mips import MIPS_INS_ALIAS_B
 from capstone.mips import MIPS_INS_ALIAS_BAL
 from capstone.mips import MIPS_INS_B
@ -82,6 +86,12 @@ UNCONDITIONAL_JUMP_INSTRUCTIONS: Dict[int, Set[int]] = {
    },
    CS_ARCH_PPC: {PPC_INS_B, PPC_INS_BA, PPC_INS_BL, PPC_INS_BLA},
    CS_ARCH_SYSTEMZ: {SYSTEMZ_INS_B, SYSTEMZ_INS_BAL, SYSTEMZ_INS_BALR},
+    CS_ARCH_LOONGARCH: {
+        LOONGARCH_INS_B,
+        LOONGARCH_INS_BL,
+        LOONGARCH_INS_JIRL,
+        LOONGARCH_INS_ALIAS_JR,
+    },
 }

 # See: https://github.com/capstone-engine/capstone/issues/2448
@ -132,6 +142,8 @@ CAPSTONE_ARCH_MAPPING_STRING = {
    CS_ARCH_MIPS: "mips",
    CS_ARCH_SPARC: "sparc",
    CS_ARCH_RISCV: "RISCV",
+    CS_ARCH_SYSTEMZ: "s390x",
+    CS_ARCH_LOONGARCH: "loongarch",
 }


--- a/pwndbg/aglib/disasm/loongarch64.py
+++ b/pwndbg/aglib/disasm/loongarch64.py
@ -0,0 +1,99 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing import Callable
+from typing import Dict
+from typing import List
+
+from capstone import *  # noqa: F403
+from capstone.loongarch import *  # noqa: F403
+from typing_extensions import override
+
+import pwndbg.aglib.disasm.arch
+import pwndbg.lib.disasm.helpers as bit_math
+from pwndbg.aglib.disasm.instruction import InstructionCondition
+from pwndbg.aglib.disasm.instruction import PwndbgInstruction
+
+# Emulator currently requires GDB, and we only use it here for type checking.
+if TYPE_CHECKING:
+    from pwndbg.emu.emulator import Emulator
+
+CONDITION_RESOLVERS: Dict[int, Callable[[List[int]], bool]] = {
+    LOONGARCH_INS_BEQZ: lambda ops: ops[0] == 0,
+    LOONGARCH_INS_BNEZ: lambda ops: ops[0] != 0,
+    LOONGARCH_INS_BEQ: lambda ops: ops[0] == ops[1],
+    LOONGARCH_INS_BNE: lambda ops: ops[0] != ops[1],
+    LOONGARCH_INS_BGE: lambda ops: bit_math.to_signed(ops[0], pwndbg.aglib.arch.ptrsize * 8)
+    >= bit_math.to_signed(ops[1], pwndbg.aglib.arch.ptrsize * 8),
+    LOONGARCH_INS_BLT: lambda ops: bit_math.to_signed(ops[0], pwndbg.aglib.arch.ptrsize * 8)
+    < bit_math.to_signed(ops[1], pwndbg.aglib.arch.ptrsize * 8),
+    LOONGARCH_INS_BLTU: lambda ops: ops[0] < ops[1],
+    LOONGARCH_INS_BGEU: lambda ops: ops[0] >= ops[1],
+}
+
+
+LOONGARCH_LOAD_INSTRUCTIONS: Dict[int, int] = {}
+
+LOONGARCH_STORE_INSTRUCTIONS: Dict[int, int] = {}
+
+LOONGARCH_BINARY_OPERATIONS: Dict[int, str] = {}
+
+
+# This class enhances 64-bit Loongarch
+class Loong64DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant):
+    def __init__(self, architecture) -> None:
+        super().__init__(architecture)
+
+        self.annotation_handlers: Dict[int, Callable[[PwndbgInstruction, Emulator], None]] = {}
+
+    @override
+    def _condition(self, instruction: PwndbgInstruction, emu: Emulator) -> InstructionCondition:
+        if len(instruction.operands) == 0:
+            return InstructionCondition.UNDETERMINED
+
+        # Not using list comprehension because they run in a separate scope in which super() does not exist
+        resolved_operands: List[int] = []
+        for op in instruction.operands:
+            resolved_operands.append(
+                super()._resolve_used_value(op.before_value, instruction, op, emu)
+            )
+
+        # If any of the relevent operands are None (we can't reason about them), quit.
+        if any(value is None for value in resolved_operands[:-1]):
+            # Note the [:-1]. Loongarch jump instructions have the target as the last operand
+            # https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#_beqz_bnez
+            return InstructionCondition.UNDETERMINED
+
+        conditional = CONDITION_RESOLVERS.get(instruction.id, lambda *a: None)(resolved_operands)
+
+        if conditional is None:
+            return InstructionCondition.UNDETERMINED
+
+        return InstructionCondition.TRUE if conditional else InstructionCondition.FALSE
+
+    @override
+    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
+        if instruction.id == LOONGARCH_INS_ALIAS_RET:
+            return self._read_register_name(instruction, "ra", emu)
+
+        # Manually compute target addresses for relative branches
+        if instruction.id in (
+            LOONGARCH_INS_B,
+            LOONGARCH_INS_BL,
+            LOONGARCH_INS_BEQ,
+            LOONGARCH_INS_BNE,
+            LOONGARCH_INS_BLT,
+            LOONGARCH_INS_BGE,
+            LOONGARCH_INS_BGEU,
+            LOONGARCH_INS_BLTU,
+            LOONGARCH_INS_BEQZ,
+            LOONGARCH_INS_BNEZ,
+        ):
+            # The relative offset is always the last operand
+            return instruction.address + instruction.operands[-1].before_value
+
+        if instruction.id == LOONGARCH_INS_JIRL:
+            if (offset_reg := instruction.operands[1].before_value) is not None:
+                return offset_reg + (instruction.operands[2].before_value << 2)
+
+        return super()._resolve_target(instruction, emu)