From 75debdef36bd748aa41c1fe4765c77a3efd5951d Mon Sep 17 00:00:00 2001
From: patryk4815 <patryk.sondej@grupawp.pl>
Date: Thu, 17 Apr 2025 21:14:39 +0200
Subject: [PATCH] Add capstone for loongarch64 (#2885)

* init loongarch64 capstone

* loongarch64 fix resolving symbols

Co-authored-by: OBarronCS <55004530+OBarronCS@users.noreply.github.com>

* disasm display arch name in instruction repr

* fix resolve target for `ALIAS_RET` instruction

Co-authored-by: OBarronCS <55004530+OBarronCS@users.noreply.github.com>

* fix reviewdog

* rebase with dev branch

* fix lint

* fix lint

* fix lint

* fix typos

---------

Co-authored-by: OBarronCS <55004530+OBarronCS@users.noreply.github.com>
---
 pwndbg/aglib/disasm/disassembly.py |  8 +++
 pwndbg/aglib/disasm/instruction.py | 12 ++++
 pwndbg/aglib/disasm/loongarch64.py | 99 ++++++++++++++++++++++++++++++
 3 files changed, 119 insertions(+)
 create mode 100644 pwndbg/aglib/disasm/loongarch64.py

diff --git a/pwndbg/aglib/disasm/disassembly.py b/pwndbg/aglib/disasm/disassembly.py
index b850c71ca..9c59bd73e 100644
--- a/pwndbg/aglib/disasm/disassembly.py
+++ b/pwndbg/aglib/disasm/disassembly.py
@@ -21,6 +21,7 @@ import pwndbg.aglib.arch
 import pwndbg.aglib.disasm.aarch64
 import pwndbg.aglib.disasm.arm
 import pwndbg.aglib.disasm.disassembly
+import pwndbg.aglib.disasm.loongarch64
 import pwndbg.aglib.disasm.mips
 import pwndbg.aglib.disasm.riscv
 import pwndbg.aglib.disasm.x86
@@ -54,6 +55,7 @@ CapstoneArch = {
     "rv32": CS_ARCH_RISCV,
     "rv64": CS_ARCH_RISCV,
     "s390x": CS_ARCH_SYSTEMZ,
+    "loongarch64": CS_ARCH_LOONGARCH,
 }
 
 CapstoneEndian = {
@@ -78,6 +80,7 @@ VariableInstructionSizeMax = {
     "rv32": 22,
     "rv64": 22,
     "s390x": 6,
+    "loongarch64": 4,
 }
 
 
@@ -192,6 +195,8 @@ def get_disassembler(address):
     elif pwndbg.aglib.arch.name == "s390x":
         # The ptrsize base modes cause capstone.CsError: Invalid mode (CS_ERR_MODE)
         extra = 0
+    elif pwndbg.aglib.arch.name == "loongarch64":
+        extra = CS_MODE_LOONGARCH64
     else:
         extra = None
 
@@ -549,6 +554,9 @@ ALL_DISASSEMBLY_ASSISTANTS: Dict[
     "mips": lambda: pwndbg.aglib.disasm.mips.MipsDisassemblyAssistant("mips"),
     "rv32": lambda: pwndbg.aglib.disasm.riscv.RISCVDisassemblyAssistant("rv32"),
     "rv64": lambda: pwndbg.aglib.disasm.riscv.RISCVDisassemblyAssistant("rv64"),
+    "loongarch64": lambda: pwndbg.aglib.disasm.loongarch64.Loong64DisassemblyAssistant(
+        "loongarch64"
+    ),
 }
 
 
diff --git a/pwndbg/aglib/disasm/instruction.py b/pwndbg/aglib/disasm/instruction.py
index 8f74537b0..bee00870a 100644
--- a/pwndbg/aglib/disasm/instruction.py
+++ b/pwndbg/aglib/disasm/instruction.py
@@ -18,6 +18,10 @@ from capstone.aarch64 import AARCH64_INS_BLR
 from capstone.aarch64 import AARCH64_INS_BR
 from capstone.arm import ARM_INS_TBB
 from capstone.arm import ARM_INS_TBH
+from capstone.loongarch import LOONGARCH_INS_ALIAS_JR
+from capstone.loongarch import LOONGARCH_INS_B
+from capstone.loongarch import LOONGARCH_INS_BL
+from capstone.loongarch import LOONGARCH_INS_JIRL
 from capstone.mips import MIPS_INS_ALIAS_B
 from capstone.mips import MIPS_INS_ALIAS_BAL
 from capstone.mips import MIPS_INS_B
@@ -82,6 +86,12 @@ UNCONDITIONAL_JUMP_INSTRUCTIONS: Dict[int, Set[int]] = {
     },
     CS_ARCH_PPC: {PPC_INS_B, PPC_INS_BA, PPC_INS_BL, PPC_INS_BLA},
     CS_ARCH_SYSTEMZ: {SYSTEMZ_INS_B, SYSTEMZ_INS_BAL, SYSTEMZ_INS_BALR},
+    CS_ARCH_LOONGARCH: {
+        LOONGARCH_INS_B,
+        LOONGARCH_INS_BL,
+        LOONGARCH_INS_JIRL,
+        LOONGARCH_INS_ALIAS_JR,
+    },
 }
 
 # See: https://github.com/capstone-engine/capstone/issues/2448
@@ -132,6 +142,8 @@ CAPSTONE_ARCH_MAPPING_STRING = {
     CS_ARCH_MIPS: "mips",
     CS_ARCH_SPARC: "sparc",
     CS_ARCH_RISCV: "RISCV",
+    CS_ARCH_SYSTEMZ: "s390x",
+    CS_ARCH_LOONGARCH: "loongarch",
 }
 
 
diff --git a/pwndbg/aglib/disasm/loongarch64.py b/pwndbg/aglib/disasm/loongarch64.py
new file mode 100644
index 000000000..3615da5a1
--- /dev/null
+++ b/pwndbg/aglib/disasm/loongarch64.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing import Callable
+from typing import Dict
+from typing import List
+
+from capstone import *  # noqa: F403
+from capstone.loongarch import *  # noqa: F403
+from typing_extensions import override
+
+import pwndbg.aglib.disasm.arch
+import pwndbg.lib.disasm.helpers as bit_math
+from pwndbg.aglib.disasm.instruction import InstructionCondition
+from pwndbg.aglib.disasm.instruction import PwndbgInstruction
+
+# Emulator currently requires GDB, and we only use it here for type checking.
+if TYPE_CHECKING:
+    from pwndbg.emu.emulator import Emulator
+
+CONDITION_RESOLVERS: Dict[int, Callable[[List[int]], bool]] = {
+    LOONGARCH_INS_BEQZ: lambda ops: ops[0] == 0,
+    LOONGARCH_INS_BNEZ: lambda ops: ops[0] != 0,
+    LOONGARCH_INS_BEQ: lambda ops: ops[0] == ops[1],
+    LOONGARCH_INS_BNE: lambda ops: ops[0] != ops[1],
+    LOONGARCH_INS_BGE: lambda ops: bit_math.to_signed(ops[0], pwndbg.aglib.arch.ptrsize * 8)
+    >= bit_math.to_signed(ops[1], pwndbg.aglib.arch.ptrsize * 8),
+    LOONGARCH_INS_BLT: lambda ops: bit_math.to_signed(ops[0], pwndbg.aglib.arch.ptrsize * 8)
+    < bit_math.to_signed(ops[1], pwndbg.aglib.arch.ptrsize * 8),
+    LOONGARCH_INS_BLTU: lambda ops: ops[0] < ops[1],
+    LOONGARCH_INS_BGEU: lambda ops: ops[0] >= ops[1],
+}
+
+
+LOONGARCH_LOAD_INSTRUCTIONS: Dict[int, int] = {}
+
+LOONGARCH_STORE_INSTRUCTIONS: Dict[int, int] = {}
+
+LOONGARCH_BINARY_OPERATIONS: Dict[int, str] = {}
+
+
+# This class enhances 64-bit Loongarch
+class Loong64DisassemblyAssistant(pwndbg.aglib.disasm.arch.DisassemblyAssistant):
+    def __init__(self, architecture) -> None:
+        super().__init__(architecture)
+
+        self.annotation_handlers: Dict[int, Callable[[PwndbgInstruction, Emulator], None]] = {}
+
+    @override
+    def _condition(self, instruction: PwndbgInstruction, emu: Emulator) -> InstructionCondition:
+        if len(instruction.operands) == 0:
+            return InstructionCondition.UNDETERMINED
+
+        # Not using list comprehension because they run in a separate scope in which super() does not exist
+        resolved_operands: List[int] = []
+        for op in instruction.operands:
+            resolved_operands.append(
+                super()._resolve_used_value(op.before_value, instruction, op, emu)
+            )
+
+        # If any of the relevent operands are None (we can't reason about them), quit.
+        if any(value is None for value in resolved_operands[:-1]):
+            # Note the [:-1]. Loongarch jump instructions have the target as the last operand
+            # https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#_beqz_bnez
+            return InstructionCondition.UNDETERMINED
+
+        conditional = CONDITION_RESOLVERS.get(instruction.id, lambda *a: None)(resolved_operands)
+
+        if conditional is None:
+            return InstructionCondition.UNDETERMINED
+
+        return InstructionCondition.TRUE if conditional else InstructionCondition.FALSE
+
+    @override
+    def _resolve_target(self, instruction: PwndbgInstruction, emu: Emulator | None):
+        if instruction.id == LOONGARCH_INS_ALIAS_RET:
+            return self._read_register_name(instruction, "ra", emu)
+
+        # Manually compute target addresses for relative branches
+        if instruction.id in (
+            LOONGARCH_INS_B,
+            LOONGARCH_INS_BL,
+            LOONGARCH_INS_BEQ,
+            LOONGARCH_INS_BNE,
+            LOONGARCH_INS_BLT,
+            LOONGARCH_INS_BGE,
+            LOONGARCH_INS_BGEU,
+            LOONGARCH_INS_BLTU,
+            LOONGARCH_INS_BEQZ,
+            LOONGARCH_INS_BNEZ,
+        ):
+            # The relative offset is always the last operand
+            return instruction.address + instruction.operands[-1].before_value
+
+        if instruction.id == LOONGARCH_INS_JIRL:
+            if (offset_reg := instruction.operands[1].before_value) is not None:
+                return offset_reg + (instruction.operands[2].before_value << 2)
+
+        return super()._resolve_target(instruction, emu)