Add initial support for Loongarch64 (#2691)

* Support for loongarch64. Add Loongarch64 register set definition, make a ManualPwndbgInstruction to define an instruction without a Capstone object underlying it, and detect Loongarch64 on GDB * Update pwndbg/aglib/disasm/instruction.py * Use Python Protocols to create an interface that the two PwndbgInstruction implementations conform to + make suggested changes --------- Co-authored-by: Disconnect3d <dominik.b.czarnota@gmail.com>
11 months ago · 1f9ec9631e
parent fea2f92ae5
commit 1f9ec9631e
5 changed files with 212 additions and 33 deletions
--- a/pwndbg/aglib/arch.py
+++ b/pwndbg/aglib/arch.py
@ -7,6 +7,7 @@ import pwnlib
 import pwndbg
 from pwndbg.lib.arch import Arch

+# List of architectures - used when determining GDB arch, and in OnlyWithArch
 ARCHS = (
    "x86-64",
    "i386",
@ -19,10 +20,11 @@ ARCHS = (
    "riscv:rv32",
    "riscv:rv64",
    "riscv",
+    "loongarch64",
 )


-# mapping between gdb and pwntools arch names
+# mapping between pwndbg and pwntools arch names
 pwnlib_archs_mapping = {
    "x86-64": "amd64",
    "i386": "i386",
@ -35,6 +37,7 @@ pwnlib_archs_mapping = {
    "armcm": "thumb",
    "rv32": "riscv32",
    "rv64": "riscv64",
+    "loongarch64": "none",
 }


--- a/pwndbg/aglib/disasm/init.py
+++ b/pwndbg/aglib/disasm/init.py
@ -23,10 +23,10 @@ import pwndbg.aglib.memory
 import pwndbg.emu.emulator
 import pwndbg.lib.cache
 from pwndbg.aglib.disasm.arch import DEBUG_ENHANCEMENT
-from pwndbg.aglib.disasm.instruction import ALL_JUMP_GROUPS
+from pwndbg.aglib.disasm.instruction import ManualPwndbgInstruction
 from pwndbg.aglib.disasm.instruction import PwndbgInstruction
+from pwndbg.aglib.disasm.instruction import PwndbgInstructionImpl
 from pwndbg.aglib.disasm.instruction import SplitType
-from pwndbg.aglib.disasm.instruction import make_simple_instruction
 from pwndbg.color import message
 from pwndbg.dbg import EventType

@ -206,13 +206,13 @@ def get_one_instruction(
            return cached

    if pwndbg.aglib.arch.current not in CapstoneArch:
-        return make_simple_instruction(address)
+        return ManualPwndbgInstruction(address)

    md = get_disassembler(address)
    size = VariableInstructionSizeMax.get(pwndbg.aglib.arch.current, 4)
    data = pwndbg.aglib.memory.read(address, size, partial=True)
    for ins in md.disasm(bytes(data), address, 1):
-        pwn_ins = PwndbgInstruction(ins)
+        pwn_ins: PwndbgInstruction = PwndbgInstructionImpl(ins)

        if enhance:
            pwndbg.aglib.disasm.arch.DisassemblyAssistant.enhance(pwn_ins, emu)
--- a/pwndbg/aglib/disasm/instruction.py
+++ b/pwndbg/aglib/disasm/instruction.py
@ -5,6 +5,7 @@ from collections import defaultdict
 from enum import Enum
 from typing import Dict
 from typing import List
+from typing import Protocol
 from typing import Set

 # Reverse lookup tables for debug printing
@ -40,6 +41,10 @@ from capstone.sparc import SPARC_INS_JMP
 from capstone.sparc import SPARC_INS_JMPL
 from capstone.x86 import X86_INS_JMP
 from capstone.x86 import X86Op
+from typing_extensions import override
+
+import pwndbg.dbg
+from pwndbg.dbg import DisassembledInstruction

 # Architecture specific instructions that mutate the instruction pointer unconditionally
 # The Capstone RET and CALL groups are also used to filter CALL and RET types when we check for unconditional jumps,
@ -115,22 +120,71 @@ CAPSTONE_ARCH_MAPPING_STRING = {
 }


+# Interface for enhanced instructions - there are two implementations defined in this file
+class PwndbgInstruction(Protocol):
+    cs_insn: CsInsn
+    address: int
+    size: int
+    mnemonic: str
+    op_str: str
+    groups: Set[int]
+    id: int
+    operands: List[EnhancedOperand]
+    asm_string: str
+    next: int
+    target: int
+    target_string: str | None
+    target_const: bool | None
+    condition: InstructionCondition
+    declare_conditional: bool | None
+    declare_is_unconditional_jump: bool
+    force_unconditional_jump_target: bool
+    annotation: str | None
+    annotation_padding: int | None
+    syscall: int | None
+    syscall_name: str | None
+    causes_branch_delay: bool
+    split: SplitType
+    emulated: bool
+
+    @property
+    def call_like(self) -> bool: ...
+
+    @property
+    def jump_like(self) -> bool: ...
+
+    @property
+    def has_jump_target(self) -> bool: ...
+
+    @property
+    def is_conditional_jump(self) -> bool: ...
+
+    @property
+    def is_unconditional_jump(self) -> bool: ...
+
+    @property
+    def is_conditional_jump_taken(self) -> bool: ...
+
+    @property
+    def bytes(self) -> bytearray: ...
+
+    def op_find(self, op_type: int, position: int) -> EnhancedOperand: ...
+
+    def op_count(self, op_type: int) -> int: ...
+
+
 # This class is used to provide context to an instructions execution, used both
 # in the disasm view output (see 'pwndbg.color.disasm.instruction()'), as well as for
 # Pwndbg commands like "nextcall" that need to know the instructions target to set breakpoints
-class PwndbgInstruction:
-    def __init__(self, cs_insn: CsInsn | None) -> None:
+# The information in this class is backed by metadata from Capstone
+class PwndbgInstructionImpl(PwndbgInstruction):
+    def __init__(self, cs_insn: CsInsn) -> None:
        self.cs_insn: CsInsn = cs_insn
        """
-        The underlying Capstone instruction, if present.
-        Ideally, only the enhancement code will access the 'cs_insn' property
+        The underlying Capstone instruction object.
+        Only the enhancement code should access the 'cs_insn' property
        """

-        # None if Capstone don't support the arch being disassembled
-        # See "make_simple_instruction" function
-        if cs_insn is None:
-            return
-
        self.address: int = cs_insn.address

        self.size: int = cs_insn.size
@ -173,7 +227,7 @@ class PwndbgInstruction:
        # in pwndbg.aglib.disasm.arch.py
        # ***********

-        self.asm_string: str = "%-06s %s" % (self.mnemonic, self.op_str)
+        self.asm_string: str = f"{self.mnemonic:<6} {self.op_str}"
        """
        The full string representing the instruction - `mov    rdi, rsp` with appropriate padding.

@ -555,29 +609,100 @@ class EnhancedOperand:
        return f"[{info}]"


-def make_simple_instruction(address: int) -> PwndbgInstruction:
-    """
-    Instantiate a PwndbgInstruction for an architecture that Capstone/pwndbg doesn't support (as defined in the CapstoneArch structure)
-    """
-    ins = pwndbg.dbg.selected_inferior().disasm(address)
-    asm = ins["asm"].split(maxsplit=1)
+# Represents a disassembled instruction
+# Conforms to the PwndbgInstruction interface
+class ManualPwndbgInstruction(PwndbgInstruction):
+    def __init__(self, address: int) -> None:
+        """
+        This class provides an implementation of PwndbgInstruction for cases where the architecture
+        at hand is not supported by the Capstone disassembler. The backing information is sourced from
+        GDB/LLDB's built-in disassemblers.
+
+        Instances of this class do not go through the 'enhancement' process due to lacking important information provided by Capstone.
+        As a result of this, some of the methods raise NotImplementedError, because if they are called it indicates a bug elsewhere in the codebase.
+        """
+        ins: DisassembledInstruction = pwndbg.dbg.selected_inferior().disasm(address)
+        asm = ins["asm"].split(maxsplit=1)
+
+        # The enhancement code assumes this value exists.
+        # However, a ManualPwndbgInstruction should never be used in the enhancement code.
+        self.cs_insn: CsInsn = None
+
+        self.address = address
+        self.size = ins["length"]
+
+        self.mnemonic = asm[0].strip()
+        self.op_str = asm[1].strip() if len(asm) > 1 else ""
+        self.groups = set()
+
+        # Set Capstone ID to -1
+        self.id = -1
+
+        self.operands = []

-    pwn_ins = PwndbgInstruction(None)
-    pwn_ins.address = address
-    pwn_ins.size = ins["length"]
+        self.asm_string = f"{self.mnemonic:<6} {self.op_str}"

-    pwn_ins.mnemonic = asm[0].strip()
-    pwn_ins.op_str = asm[1].strip() if len(asm) > 1 else ""
+        self.next = address + self.size
+        self.target = self.next
+        self.target_string = None
+        self.target_const = None

-    pwn_ins.next = address + pwn_ins.size
-    pwn_ins.target = pwn_ins.next
+        self.condition = InstructionCondition.UNDETERMINED

-    pwn_ins.groups = []
+        self.declare_conditional = None
+        self.declare_is_unconditional_jump = False
+        self.force_unconditional_jump_target = False

-    pwn_ins.condition = InstructionCondition.UNDETERMINED
+        self.annotation = None

-    pwn_ins.annotation = None
+        self.annotation_padding = None

-    pwn_ins.operands = []
+        self.syscall = None
+        self.syscall_name = None

-    return pwn_ins
+        self.causes_branch_delay = False
+
+        self.split = SplitType.NO_SPLIT
+
+        self.emulated = False
+
+    @property
+    def bytes(self) -> bytearray:
+        # GDB simply doesn't provide us with the raw bytes.
+        # However, it is important that this returns a valid bytearray,
+        # since the disasm code indexes this for nearpc-num-opcode-bytes.
+        return bytearray()
+
+    @property
+    def call_like(self) -> bool:
+        return False
+
+    @property
+    def jump_like(self) -> bool:
+        return False
+
+    @property
+    def has_jump_target(self) -> bool:
+        return False
+
+    @property
+    def is_conditional_jump(self) -> bool:
+        return False
+
+    @property
+    def is_unconditional_jump(self) -> bool:
+        return False
+
+    @property
+    def is_conditional_jump_taken(self) -> bool:
+        return False
+
+    @override
+    def op_find(self, op_type: int, position: int) -> EnhancedOperand:
+        # raise NotImplementedError, because if this is called it indicates a bug elsewhere in the codebase.
+        # ManualPwndbgInstruction should not go through the enhancement process, where this would be called.
+        raise NotImplementedError
+
+    @override
+    def op_count(self, op_type: int) -> int:
+        raise NotImplementedError
--- a/pwndbg/dbg/gdb/init.py
+++ b/pwndbg/dbg/gdb/init.py
@ -678,6 +678,8 @@ class GDBProcess(pwndbg.dbg_mod.Process):
            arch = gdb.execute("show architecture", to_string=True).strip()
            not_exactly_arch = True

+        arch = arch.lower()
+
        # Below, we fix the fetched architecture
        for match in pwndbg.aglib.arch_mod.ARCHS:
            if match in arch:
--- a/pwndbg/lib/regs.py
+++ b/pwndbg/lib/regs.py
@ -622,6 +622,54 @@ riscv = RegisterSet(
    retval="a0",
 )

+# https://docs.kernel.org/arch/loongarch/introduction.html
+loongarch64 = RegisterSet(
+    pc="pc",
+    stack="sp",
+    frame="fp",
+    retaddr=("ra",),
+    gpr=(
+        "a0",
+        "a1",
+        "a2",
+        "a3",
+        "a4",
+        "a5",
+        "a6",
+        "a7",
+        "t0",
+        "t1",
+        "t2",
+        "t3",
+        "t4",
+        "t5",
+        "t6",
+        "t7",
+        "t8",
+        "s0",
+        "s1",
+        "s2",
+        "s3",
+        "s4",
+        "s5",
+        "s6",
+        "s7",
+        "s8",
+    ),
+    args=(
+        "a0",
+        "a1",
+        "a2",
+        "a3",
+        "a4",
+        "a5",
+        "a6",
+        "a7",
+    ),
+    # r21 stores "percpu base address", referred to as "u0" in the kernel
+    misc=("tp", "r21"),
+)
+
 reg_sets = {
    "i386": i386,
    "i8086": i386,
@ -635,4 +683,5 @@ reg_sets = {
    "armcm": armcm,
    "aarch64": aarch64,
    "powerpc": powerpc,
+    "loongarch64": loongarch64,
 }