Annotate all load operations Arm, AArch64, RISCV, and MIPS (#2309)

* _generate_load_annotator * Parent class function for load-instruction annotations. RISCV loads implemented and tested. SPARC load/store instructions noted * Get ARM32 load and store instructions * Add all AArch64 loads and stores * MIPS memory address resolver * AArch64 memory resolver * AArch64 shift operation in memory operands * Arm resolve memory operands and PC special case * Lint * comment * rebase and lint * comment corrections * Fill in arm function maps * extract mips load instructions * lint * Remove unnecessary parameter to enhancement telescope function * Implement signed loads * Now with load code moved to parent, refactor in x86 class * lint * aarch64 read size fix * arm thumb mode pc + 4 * read thumb bit from emu when needed * lint * rebase * lint * rebase * Add load annotator to MIPS * lint * fix last aarch64 register thing * minor fixes
1 year ago · 117a68b7f2
parent 6b85347806
commit 117a68b7f2
7 changed files with 527 additions and 88 deletions
--- a/pwndbg/gdblib/disasm/aarch64.py
+++ b/pwndbg/gdblib/disasm/aarch64.py
@ -20,6 +20,83 @@ from pwndbg.gdblib.disasm.instruction import InstructionCondition
 from pwndbg.gdblib.disasm.instruction import PwndbgInstruction
 from pwndbg.gdblib.disasm.instruction import boolean_to_instruction_condition

+# Negative size indicates signed read
+# None indicates the read size depends on the target register
+AARCH64_SINGLE_LOAD_INSTRUCTIONS: Dict[int, int | None] = {
+    ARM64_INS_LDRB: 1,
+    ARM64_INS_LDURB: 1,
+    ARM64_INS_LDRSB: -1,
+    ARM64_INS_LDURSB: -1,
+    ARM64_INS_LDRH: 2,
+    ARM64_INS_LDURH: 2,
+    ARM64_INS_LDRSH: -2,
+    ARM64_INS_LDURSH: -2,
+    ARM64_INS_LDURSW: -4,
+    ARM64_INS_LDRSW: -4,
+    ARM64_INS_LDUR: None,
+    ARM64_INS_LDR: None,
+    ARM64_INS_LDTRB: 1,
+    ARM64_INS_LDTRSB: -1,
+    ARM64_INS_LDTRH: 2,
+    ARM64_INS_LDTRSH: -2,
+    ARM64_INS_LDTRSW: -4,
+    ARM64_INS_LDTR: None,
+    ARM64_INS_LDXRB: 1,
+    ARM64_INS_LDXRH: 2,
+    ARM64_INS_LDXR: None,
+    ARM64_INS_LDARB: 1,
+    ARM64_INS_LDARH: 2,
+    ARM64_INS_LDAR: None,
+}
+
+AARCH64_SINGLE_STORE_INSTRUCTIONS: Dict[int, int | None] = {
+    ARM64_INS_STRB: 1,
+    ARM64_INS_STURB: 1,
+    ARM64_INS_STRH: 2,
+    ARM64_INS_STURH: 2,
+    ARM64_INS_STUR: None,
+    ARM64_INS_STR: None,
+    # Store Register (unprivileged)
+    ARM64_INS_STTRB: 1,
+    ARM64_INS_STTRH: 2,
+    ARM64_INS_STTR: None,
+    # Store Exclusive
+    ARM64_INS_STXRB: 1,
+    ARM64_INS_STXRH: 2,
+    ARM64_INS_STXR: None,
+    # Store-Release
+    ARM64_INS_STLRB: 1,
+    ARM64_INS_STLRH: 2,
+    ARM64_INS_STLR: None,
+    # Store-Release Exclusive
+    ARM64_INS_STLXRB: 1,
+    ARM64_INS_STLXRH: 2,
+    ARM64_INS_STLXR: None,
+}
+
+# Parameters to each function: (value, shift_amt, bit_width)
+AARCH64_BIT_SHIFT_MAP: Dict[int, Callable[[int, int, int], int]] = {
+    ARM64_SFT_LSL: bit_math.logical_shift_left,
+    ARM64_SFT_LSR: bit_math.logical_shift_right,
+    ARM64_SFT_ASR: bit_math.arithmetic_shift_right,
+    ARM64_SFT_ROR: bit_math.rotate_right,
+}
+
+
+# These are "Extend" operations - https://devblogs.microsoft.com/oldnewthing/20220728-00/?p=106912
+# They take in a number, extract a byte, halfword, or word,
+# and perform a zero- or sign-extend operation.
+AARCH64_EXTEND_MAP: Dict[int, Callable[[int], int]] = {
+    ARM64_EXT_UXTB: lambda x: x & ((1 << 8) - 1),
+    ARM64_EXT_UXTH: lambda x: x & ((1 << 16) - 1),
+    ARM64_EXT_UXTW: lambda x: x & ((1 << 32) - 1),
+    ARM64_EXT_UXTX: lambda x: x,  # UXTX has no effect. It extracts 64-bits from a 64-bit register.
+    ARM64_EXT_SXTB: lambda x: bit_math.to_signed(x, 8),
+    ARM64_EXT_SXTH: lambda x: bit_math.to_signed(x, 16),
+    ARM64_EXT_SXTW: lambda x: bit_math.to_signed(x, 32),
+    ARM64_EXT_SXTX: lambda x: bit_math.to_signed(x, 64),
+}
+

 def resolve_condition(condition: int, cpsr: int) -> InstructionCondition:
    """
@ -56,29 +133,6 @@ def resolve_condition(condition: int, cpsr: int) -> InstructionCondition:
    return InstructionCondition.TRUE if condition else InstructionCondition.FALSE


-# Parameters to each function: (value, shift_amt, bit_width)
-AARCH64_BIT_SHIFT_MAP: Dict[int, Callable[[int, int, int], int]] = {
-    ARM64_SFT_LSL: bit_math.logical_shift_left,
-    ARM64_SFT_LSR: bit_math.logical_shift_right,
-    ARM64_SFT_ASR: bit_math.arithmetic_shift_right,
-    ARM64_SFT_ROR: bit_math.rotate_right,
-}
-
-# These are "Extend" operations - https://devblogs.microsoft.com/oldnewthing/20220728-00/?p=106912
-# They take in a number, extract a byte, halfword, or word,
-# and perform a zero- or sign-extend operation.
-AARCH64_EXTEND_MAP: Dict[int, Callable[[int], int]] = {
-    ARM64_EXT_UXTB: lambda x: x & ((1 << 8) - 1),
-    ARM64_EXT_UXTH: lambda x: x & ((1 << 16) - 1),
-    ARM64_EXT_UXTW: lambda x: x & ((1 << 32) - 1),
-    ARM64_EXT_UXTX: lambda x: x,  # UXTX has no effect. It extracts 64-bits from a 64-bit register.
-    ARM64_EXT_SXTB: lambda x: bit_math.to_signed(x, 8),
-    ARM64_EXT_SXTH: lambda x: bit_math.to_signed(x, 16),
-    ARM64_EXT_SXTW: lambda x: bit_math.to_signed(x, 32),
-    ARM64_EXT_SXTX: lambda x: bit_math.to_signed(x, 64),
-}
-
-
 class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
    def __init__(self, architecture: str) -> None:
        super().__init__(architecture)
@ -90,8 +144,6 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
            ARM64_INS_ADR: self._common_generic_register_destination,
            # ADRP
            ARM64_INS_ADRP: self._common_generic_register_destination,
-            # LDR
-            ARM64_INS_LDR: self._common_generic_register_destination,
            # ADD
            ARM64_INS_ADD: self._common_generic_register_destination,
            # SUB
@ -108,6 +160,26 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
            ARM64_INS_CCMN: self._common_cmp_annotator_builder("cpsr", ""),
        }

+    @override
+    def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
+        # Dispatch to the correct handler
+        if instruction.id in AARCH64_SINGLE_LOAD_INSTRUCTIONS:
+            target_reg_size = self._register_width(instruction, instruction.operands[0]) // 8
+            read_size = AARCH64_SINGLE_LOAD_INSTRUCTIONS[instruction.id] or target_reg_size
+
+            self._common_load_annotator(
+                instruction,
+                emu,
+                instruction.operands[1].before_value,
+                abs(read_size),
+                read_size < 0,
+                target_reg_size,
+                instruction.operands[0].str,
+                instruction.operands[1].str,
+            )
+        else:
+            self.annotation_handlers.get(instruction.id, lambda *a: None)(instruction, emu)
+
    @override
    def _condition(
        self, instruction: PwndbgInstruction, emu: Emulator
@ -164,9 +236,58 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
        return super()._resolve_target(instruction, emu, call)

    @override
-    def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
-        # Dispatch to the correct handler
-        self.annotation_handlers.get(instruction.id, lambda *a: None)(instruction, emu)
+    def _parse_memory(
+        self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator
+    ) -> int | None:
+        """
+        Parse the `Arm64OpMem` Capstone object to determine the concrete memory address used.
+
+        Three types of AArch64 memory operands:
+        1. Register base with optional immediate offset
+        Examples:
+              ldrb   w3, [x2]
+              str    x1, [x2, #0xb58]
+              ldr x4,[x3], 4
+        2. Register + another register with an optional shift
+        Examples:
+              ldrb   w1, [x9, x2]
+              str x1, [x2, x0, lsl #3]
+        3. Register + 32-bit register extended and shifted.
+        The shift in this case is implicitly a LSL
+        Examples:
+              ldr x1, [x2, w22, UXTW #3]
+
+        """
+
+        target = 0
+
+        # All memory operands have `base` defined
+        base = self._read_register(instruction, op.mem.base, emu)
+        if base is None:
+            return None
+        target = base + op.mem.disp
+
+        # If there is an index register
+        if op.mem.index != 0:
+            index = self._read_register(instruction, op.mem.index, emu)
+            if index is None:
+                return None
+
+            # Optionally apply an extend to the index register
+            if op.cs_op.ext != 0:
+                index = AARCH64_EXTEND_MAP[op.cs_op.ext](index)
+
+            # Optionally apply shift to the index register
+            # This handles shifts in the extend operation as well:
+            # As in the case of `ldr x1, [x2, w22, UXTW #3]`,
+            # Capstone will automatically make the shift a LSL and set the value to 3
+            if op.cs_op.shift.type != 0:
+                # The form of instructions with a shift always apply the shift to a 64-bit value
+                index = AARCH64_BIT_SHIFT_MAP[op.cs_op.shift.type](index, op.cs_op.shift.value, 64)
+
+            target += index
+
+        return target

    def _register_width(self, instruction: PwndbgInstruction, op: EnhancedOperand) -> int:
        return 32 if instruction.cs_insn.reg_name(op.reg)[0] == "w" else 64
--- a/pwndbg/gdblib/disasm/arch.py
+++ b/pwndbg/gdblib/disasm/arch.py
@ -10,6 +10,7 @@ from capstone import *  # noqa: F403
 import pwndbg.chain
 import pwndbg.color.context as C
 import pwndbg.color.memory as MemoryColor
+import pwndbg.color.message as MessageColor
 import pwndbg.color.syntax_highlight as H
 import pwndbg.enhance
 import pwndbg.gdblib.memory
@ -18,6 +19,7 @@ import pwndbg.gdblib.symbol
 import pwndbg.gdblib.typeinfo
 import pwndbg.gdblib.vmmap
 import pwndbg.lib.config
+import pwndbg.lib.disasm.helpers as bit_math
 from pwndbg.emu.emulator import Emulator
 from pwndbg.gdblib.disasm.instruction import FORWARD_JUMP_GROUP
 from pwndbg.gdblib.disasm.instruction import EnhancedOperand
@ -394,10 +396,9 @@ class DisassemblyAssistant:
        address: int,
        size: int,
        instruction: PwndbgInstruction,
-        operand: EnhancedOperand,
        emu: Emulator,
    ) -> int | None:
-        address_list = self._telescope(address, 1, instruction, operand, emu, read_size=size)
+        address_list = self._telescope(address, 1, instruction, emu, read_size=size)

        if len(address_list) >= 2:
            return address_list[1]
@ -426,7 +427,7 @@ class DisassemblyAssistant:
        elif operand.type == CS_OP_MEM:
            # Assume that we are reading ptrsize - subclasses should override this function
            # to provide a more specific value if needed
-            self._read_memory(value, pwndbg.gdblib.arch.ptrsize, instruction, operand, emu)
+            self._read_memory(value, pwndbg.gdblib.arch.ptrsize, instruction, emu)

        return None

@ -435,7 +436,6 @@ class DisassemblyAssistant:
        address: int,
        limit: int,
        instruction: PwndbgInstruction,
-        operand: EnhancedOperand,
        emu: Emulator,
        read_size: int = None,
    ) -> List[int]:
@ -470,7 +470,7 @@ class DisassemblyAssistant:

            else:
                return pwndbg.chain.get(address, limit=limit)
-        elif not can_read_process_state or operand.type == CS_OP_IMM:
+        else:
            # If the target address is in a non-writeable map, we can pretty safely telescope
            # This is best-effort to give a better experience

@ -746,7 +746,6 @@ class DisassemblyAssistant:
                left.after_value,
                TELESCOPE_DEPTH + 1,
                instruction,
-                left,
                emu,
                read_size=pwndbg.gdblib.arch.ptrsize,
            )
@ -801,5 +800,79 @@ class DisassemblyAssistant:

        return handler

+    def _common_load_annotator(
+        self,
+        instruction: PwndbgInstruction,
+        emu: Emulator,
+        address: int | None,
+        read_size: int,
+        signed: bool,
+        target_size: int,
+        dest_str: str,
+        source_str: str,
+    ) -> None:
+        """
+        This function annotates load instructions - moving data from memory into a register.
+
+        These instructions read `read_size` bytes from memory into a register.
+
+        `signed`: whether or not we are loading a signed value from memory
+        `target_size`: the size of the register in bytes - relevent for sign-extension
+        `dest_str`: a string representing the destination register ('rax')
+        `source_str`: a string representing the source address ('[0x7fffffffe138]')
+        """
+
+        if address is None:
+            return
+
+        # There are many cases we need to consider when we are loading a value from memory
+        # Were we able to reason about the memory address, and dereference it?
+        # Does the resolved memory address actual point into memory?
+        # If the target register size is larger than the read size, then do we need sign-extension?
+
+        # If the address is not mapped, we segfaulted
+        if not pwndbg.gdblib.memory.peek(address):
+            instruction.annotation = MessageColor.error(
+                f"<Cannot dereference [{MemoryColor.get(address)}]>"
+            )
+        else:
+            # In this branch, it is assumed that the address IS in a mapped page
+            TELESCOPE_DEPTH = max(1, int(pwndbg.config.disasm_telescope_depth))
+
+            telescope_addresses = self._telescope(
+                address,
+                TELESCOPE_DEPTH,
+                instruction,
+                emu,
+                read_size=read_size,
+            )
+
+            if len(telescope_addresses) == 1:
+                # If telescope returned only 1 address (and we already know the address is in a mapped page)
+                # it means we couldn't reason about the dereferenced memory.
+                # In this case, simply display the address
+
+                # As an example, this path is taken for the following case:
+                # mov rdi, qword ptr [rip + 0x17d40] where the resolved memory address is in writeable memory,
+                # and we are not emulating. This means we cannot savely dereference if PC is not at the current instruction address,
+                # because the the memory address could have been written to by the time the instruction executes
+                telescope_print = None
+            else:
+                if signed and read_size != target_size and len(telescope_addresses) == 2:
+                    # We sign extend the value, then convert it back to the unsigned bit representation
+                    final_value = bit_math.to_signed(telescope_addresses[1], read_size * 8) & (
+                        (1 << (target_size * 8)) - 1
+                    )
+                    # If it's a signed read that required extension, it will just be a number with no special symbol/color needed
+                    telescope_print = hex(final_value)
+                else:
+                    # Start showing at dereferenced address, hence the [1:]
+                    telescope_print = f"{self._telescope_format_list(telescope_addresses[1:], TELESCOPE_DEPTH, emu)}"
+
+            instruction.annotation = f"{dest_str}, {source_str}"
+
+            if telescope_print is not None:
+                instruction.annotation += f" => {telescope_print}"
+

 generic_assistant = DisassemblyAssistant(None)
--- a/pwndbg/gdblib/disasm/arm.py
+++ b/pwndbg/gdblib/disasm/arm.py
@ -5,6 +5,7 @@ from typing import Dict

 from capstone import *  # noqa: F403
 from capstone.arm import *  # noqa: F403
+from pwnlib.util.misc import align_down
 from typing_extensions import override

 import pwndbg.gdblib.arch
@ -13,6 +14,7 @@ import pwndbg.gdblib.memory
 import pwndbg.gdblib.regs
 import pwndbg.lib.disasm.helpers as bit_math
 from pwndbg.emu.emulator import Emulator
+from pwndbg.gdblib.arch import read_thumb_bit as process_read_thumb_bit
 from pwndbg.gdblib.disasm.instruction import EnhancedOperand
 from pwndbg.gdblib.disasm.instruction import InstructionCondition
 from pwndbg.gdblib.disasm.instruction import PwndbgInstruction
@ -25,6 +27,34 @@ ARM_BIT_SHIFT_MAP: Dict[int, Callable[[int, int, int], int]] = {
    ARM_SFT_ROR: bit_math.rotate_right,
 }

+ARM_SINGLE_LOAD_INSTRUCTIONS = {
+    ARM_INS_LDRB: 1,
+    ARM_INS_LDRSB: -1,
+    ARM_INS_LDRH: 2,
+    ARM_INS_LDRSH: -2,
+    ARM_INS_LDR: 4,
+    ARM_INS_LDRBT: 1,
+    ARM_INS_LDRSBT: -1,
+    ARM_INS_LDRHT: 2,
+    ARM_INS_LDRSHT: -2,
+    ARM_INS_LDRT: 4,
+    ARM_INS_LDREXB: 1,
+    ARM_INS_LDREXH: 2,
+    ARM_INS_LDREX: 4,
+}
+
+ARM_SINGLE_STORE_INSTRUCTIONS = {
+    ARM_INS_STRB: 1,
+    ARM_INS_STRH: 2,
+    ARM_INS_STR: 4,
+    ARM_INS_STRBT: 1,
+    ARM_INS_STRHT: 2,
+    ARM_INS_STRT: 4,
+    ARM_INS_STREXB: 1,
+    ARM_INS_STREXH: 2,
+    ARM_INS_STREX: 4,
+}
+

 class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
    def __init__(self, architecture: str) -> None:
@ -43,7 +73,19 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):

    @override
    def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
-        # Dispatch to the correct handler
+        if instruction.id in ARM_SINGLE_LOAD_INSTRUCTIONS:
+            read_size = ARM_SINGLE_LOAD_INSTRUCTIONS[instruction.id]
+            self._common_load_annotator(
+                instruction,
+                emu,
+                instruction.operands[1].before_value,
+                abs(read_size),
+                read_size < 0,
+                4,
+                instruction.operands[0].str,
+                instruction.operands[1].str,
+            )
+        else:
            self.annotation_handlers.get(instruction.id, lambda *a: None)(instruction, emu)

    @override
@ -99,8 +141,8 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
            target = target & ~1
        return target

-    @override
-    def _memory_string(self, instruction: PwndbgInstruction, op: EnhancedOperand) -> str:
+    # Currently not used
+    def _memory_string_old(self, instruction: PwndbgInstruction, op: EnhancedOperand) -> str:
        parts = []

        if op.mem.base != 0:
@ -116,10 +158,79 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):

        return f"[{(', '.join(parts))}]"

+    def read_thumb_bit(self, instruction: PwndbgInstruction, emu: Emulator) -> int | None:
+        if emu:
+            return emu.read_thumb_bit()
+        elif self.can_reason_about_process_state(instruction):
+            # Read the Thumb bit directly from the process flag register if we can
+            return process_read_thumb_bit()
+        else:
+            return 0
+
    @override
    def _immediate_string(self, instruction, operand):
        return "#" + super()._immediate_string(instruction, operand)

+    @override
+    def _read_register(
+        self, instruction: PwndbgInstruction, operand_id: int, emu: Emulator
+    ) -> int | None:
+        # When `pc` is referenced in an operand (typically in a memory operand), the value it takes on
+        # is `pc_at_instruction + 8`. In Thumb mode, you only add 4 to the instruction address.
+        if operand_id == ARM_REG_PC:
+            return instruction.address + (4 if self.read_thumb_bit(instruction, emu) else 8)
+
+        return super()._read_register(instruction, operand_id, emu)
+
+    @override
+    def _parse_memory(
+        self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator
+    ) -> int | None:
+        """
+        Parse the `ArmOpMem` Capstone object to determine the concrete memory address used.
+
+        Types of memory operands:
+            [Rn]
+            [Rn, #imm]
+            [Rn, Rm]
+            [Rn, Rm, <shift> #imm]
+
+        Capstone represents the object a bit differently then AArch64 to align with the underlying architecture of Arm.
+
+        This representation will change in Capstone 6:
+            https://github.com/capstone-engine/capstone/issues/2281
+            https://github.com/capstone-engine/capstone/pull/1949
+        """
+
+        target = 0
+
+        # All memory operands have `base` defined
+        base = self._read_register(instruction, op.mem.base, emu)
+        if base is None:
+            return None
+
+        if op.mem.base == ARM_REG_PC:
+            # The PC as the base register is a special case - it will align the address to a word (32-bit) boundary
+            # Explanation: https://stackoverflow.com/a/29588678
+            # See "Operation" at the bottom of https://developer.arm.com/documentation/ddi0597/2024-03/Base-Instructions/LDR--literal---Load-Register--literal--
+            base = align_down(4, base)
+
+        target = base + op.mem.disp
+
+        # If there is an index register
+        if op.mem.index != 0:
+            index = self._read_register(instruction, op.mem.index, emu)
+            if index is None:
+                return None
+
+            # Optionally apply shift to the index register
+            if op.cs_op.shift.type != 0:
+                index = ARM_BIT_SHIFT_MAP[op.cs_op.shift.type](index, op.cs_op.shift.value, 32)
+
+            target += index * (-1 if op.cs_op.subtracted else 1)
+
+        return target
+
    @override
    def _parse_register(
        self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator
--- a/pwndbg/gdblib/disasm/mips.py
+++ b/pwndbg/gdblib/disasm/mips.py
@ -75,23 +75,9 @@ MIPS_SIMPLE_DESTINATION_INSTRUCTIONS = {
    MIPS_INS_DCLZ,
    MIPS_INS_DSUB,
    MIPS_INS_DSUBU,
-    MIPS_INS_LB,
-    MIPS_INS_LBU,
-    MIPS_INS_LD,
-    MIPS_INS_LDL,
-    MIPS_INS_LDPC,
-    MIPS_INS_LDR,
-    MIPS_INS_LH,
-    MIPS_INS_LHU,
    MIPS_INS_LSA,
    MIPS_INS_DLSA,
    MIPS_INS_LUI,
-    MIPS_INS_LW,
-    MIPS_INS_LWL,
-    MIPS_INS_LWPC,
-    MIPS_INS_LWR,
-    MIPS_INS_LWU,
-    MIPS_INS_LWUPC,
    MIPS_INS_MFHI,
    MIPS_INS_MFLO,
    MIPS_INS_SEB,
@ -105,6 +91,26 @@ MIPS_SIMPLE_DESTINATION_INSTRUCTIONS = {
    MIPS_INS_SLTI,
    MIPS_INS_SLTIU,
    MIPS_INS_SLTU,
+    # Rare - unaligned read - have complex loading logic
+    MIPS_INS_LDL,
+    MIPS_INS_LDR,
+    # Rare - partial load on portions of address
+    MIPS_INS_LWL,
+    MIPS_INS_LWR,
+}
+
+# All MIPS load instructions
+MIPS_LOAD_INSTRUCTIONS = {
+    MIPS_INS_LB: 1,
+    MIPS_INS_LBU: 1,
+    MIPS_INS_LH: 2,
+    MIPS_INS_LHU: 2,
+    MIPS_INS_LW: 4,
+    MIPS_INS_LWU: 4,
+    MIPS_INS_LWPC: 4,
+    MIPS_INS_LWUPC: 4,
+    MIPS_INS_LD: 8,
+    MIPS_INS_LDPC: 8,
 }


@ -115,7 +121,20 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):

    @override
    def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
-        if instruction.id in MIPS_SIMPLE_DESTINATION_INSTRUCTIONS:
+        if instruction.id in MIPS_LOAD_INSTRUCTIONS:
+            read_size = MIPS_LOAD_INSTRUCTIONS[instruction.id]
+
+            self._common_load_annotator(
+                instruction,
+                emu,
+                instruction.operands[1].before_value,
+                abs(read_size),
+                read_size < 0,
+                pwndbg.gdblib.arch.ptrsize,
+                instruction.operands[0].str,
+                instruction.operands[1].str,
+            )
+        elif instruction.id in MIPS_SIMPLE_DESTINATION_INSTRUCTIONS:
            self._common_generic_register_destination(instruction, emu)

    @override
@ -152,5 +171,20 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):

        return super()._resolve_target(instruction, emu, call)

+    @override
+    def _parse_memory(
+        self,
+        instruction: PwndbgInstruction,
+        op: pwndbg.gdblib.disasm.arch.EnhancedOperand,
+        emu: Emulator,
+    ) -> int | None:
+        """
+        Parse the `MipsOpMem` Capstone object to determine the concrete memory address used.
+        """
+        base = self._read_register(instruction, op.mem.base, emu)
+        if base is None:
+            return None
+        return base + op.mem.disp
+

 assistant = DisassemblyAssistant("mips")
--- a/pwndbg/gdblib/disasm/riscv.py
+++ b/pwndbg/gdblib/disasm/riscv.py
@ -4,20 +4,107 @@ from capstone import *  # noqa: F403
 from capstone.riscv import *  # noqa: F403
 from typing_extensions import override

+import pwndbg.color.memory as MemoryColor
 import pwndbg.gdblib.arch
 import pwndbg.gdblib.disasm.arch
 import pwndbg.gdblib.regs
 import pwndbg.lib.disasm.helpers as bit_math
 from pwndbg.emu.emulator import Emulator
+from pwndbg.gdblib.disasm.instruction import EnhancedOperand
 from pwndbg.gdblib.disasm.instruction import InstructionCondition
 from pwndbg.gdblib.disasm.instruction import PwndbgInstruction

+RISCV_LOAD_INSTRUCTIONS = {
+    # Sign-extend loads
+    RISCV_INS_LB: -1,
+    RISCV_INS_LH: -2,
+    RISCV_INS_LW: -4,
+    # Zero-extend loads
+    RISCV_INS_LBU: 1,
+    RISCV_INS_LHU: 2,
+    RISCV_INS_LWU: 4,
+    RISCV_INS_LD: 8,
+}
+
+# Due to a bug in Capstone, these instructions have incorrect operands to represent a memory address.
+# So we temporarily separate them to handle them differently
+# This will be fixed in Capstone 6 - https://github.com/capstone-engine/capstone/pull/2393
+# TODO: remove this when updating to Capstone 6
+RISCV_COMPRESSED_LOAD_INSTRUCTIONS = {RISCV_INS_C_LW: -4, RISCV_INS_C_LD: 8, RISCV_INS_C_LDSP: 8}
+
+RISCV_STORE_INSTRUCTIONS = {
+    RISCV_INS_SB: 1,
+    RISCV_INS_SH: 2,
+    RISCV_INS_SW: 4,
+    RISCV_INS_SD: 8,
+}
+
+# TODO: remove this when updating to Capstone 6
+RISCV_COMPRESSED_STORE_INSTRUCTIONS = {
+    RISCV_INS_C_SW: 4,
+    RISCV_INS_C_SD: 8,
+}
+

 class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
    def __init__(self, architecture) -> None:
        super().__init__(architecture)
        self.architecture = architecture

+    @override
+    def _set_annotation_string(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
+        if instruction.id in RISCV_LOAD_INSTRUCTIONS:
+            read_size = RISCV_LOAD_INSTRUCTIONS[instruction.id]
+            self._common_load_annotator(
+                instruction,
+                emu,
+                instruction.operands[1].before_value,
+                abs(read_size),
+                read_size < 0,
+                pwndbg.gdblib.arch.ptrsize,
+                instruction.operands[0].str,
+                instruction.operands[1].str,
+            )
+
+        # TODO: remove this when updating to Capstone 6
+        if instruction.id in RISCV_COMPRESSED_LOAD_INSTRUCTIONS:
+            # We need to manually resolve this now since Capstone doesn't properly represent
+            # memory operands for compressed instructions.
+            address = self._resolve_compressed_target_addr(instruction, emu)
+            if address is not None:
+                read_size = RISCV_COMPRESSED_LOAD_INSTRUCTIONS[instruction.id]
+
+                dest_str = f"[{MemoryColor.get_address_or_symbol(address)}]"
+
+                self._common_load_annotator(
+                    instruction,
+                    emu,
+                    address,
+                    abs(read_size),
+                    read_size < 0,
+                    pwndbg.gdblib.arch.ptrsize,
+                    instruction.operands[0].str,
+                    dest_str,
+                )
+
+        return super()._set_annotation_string(instruction, emu)
+
+    def _resolve_compressed_target_addr(
+        self, instruction: PwndbgInstruction, emu: Emulator
+    ) -> int | None:
+        """
+        Calculate the address used in a compressed load/store instruction.
+        None if address cannot be resolved.
+
+        TODO: remove this when updating to Capstone 6
+        """
+        _, disp, reg = instruction.operands
+
+        if disp.before_value is None or reg.before_value is None:
+            return None
+
+        return disp.before_value + reg.before_value
+
    def _is_condition_taken(
        self, instruction: PwndbgInstruction, emu: Emulator | None
    ) -> InstructionCondition:
@ -102,6 +189,18 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):

        return super()._resolve_target(instruction, emu, call)

+    @override
+    def _parse_memory(
+        self, instruction: PwndbgInstruction, op: EnhancedOperand, emu: Emulator
+    ) -> int | None:
+        """
+        Parse the `RISCVOpMem` Capstone object to determine the concrete memory address used.
+        """
+        base = self._read_register(instruction, op.mem.base, emu)
+        if base is None:
+            return None
+        return base + op.mem.disp
+

 assistant_rv32 = DisassemblyAssistant("rv32")
 assistant_rv64 = DisassemblyAssistant("rv64")
--- a/pwndbg/gdblib/disasm/sparc.py
+++ b/pwndbg/gdblib/disasm/sparc.py
@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from capstone.sparc import *  # noqa: F403
+
+# Instruction groups for future use
+SPARC_LOAD_INSTRUCTIONS = {
+    SPARC_INS_LDUB: 1,
+    SPARC_INS_LDSB: 1,
+    SPARC_INS_LDUH: 2,
+    SPARC_INS_LDSH: 2,
+    SPARC_INS_LD: 4,
+    SPARC_INS_LDD: 8,
+}
+
+SPARC_STORE_INSTRUCTIONS = {
+    SPARC_INS_STB: 1,
+    SPARC_INS_STH: 2,
+    SPARC_INS_ST: 4,
+    SPARC_INS_STD: 8,
+}
--- a/pwndbg/gdblib/disasm/x86.py
+++ b/pwndbg/gdblib/disasm/x86.py
@ -79,8 +79,20 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
    def handle_mov(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
        left, right = instruction.operands

-        # Read from right operand
-        if right.before_value is not None:
+        # If this is a LOAD operation - MOV REG, [MEM]
+        if left.type == CS_OP_REG and right.type == CS_OP_MEM:
+            self._common_load_annotator(
+                instruction,
+                emu,
+                right.before_value,
+                right.cs_op.size,
+                False,
+                right.cs_op.size,
+                left.str,
+                right.str,
+            )
+        # Handle other cases of MOV
+        elif right.before_value is not None:
            TELESCOPE_DEPTH = max(0, int(pwndbg.config.disasm_telescope_depth))

            # +1 to ensure we telescope enough to read at least one address for the last "elif" below
@ -88,7 +100,6 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
                right.before_value,
                TELESCOPE_DEPTH + 1,
                instruction,
-                right,
                emu,
                read_size=right.cs_op.size,
            )
@ -111,36 +122,6 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
            elif left.type == CS_OP_REG and right.type in (CS_OP_REG, CS_OP_IMM):
                instruction.annotation = f"{left.str} => {super()._telescope_format_list(telescope_addresses, TELESCOPE_DEPTH, emu)}"

-            # MOV REG, [MEM]
-            elif left.type == CS_OP_REG and right.type == CS_OP_MEM:
-                # There are many cases we need to consider if there is a mov from a dereference memory location into a register
-                # Were we able to reason about the memory address, and dereference it?
-                # Does the resolved memory address actual point into memory?
-
-                # right.before_value should be a pointer in this context. If we telescoped and still returned just the value itself,
-                # it indicates that the dereference likely segfaults
-
-                if not pwndbg.gdblib.memory.peek(right.before_value):
-                    telescope_print = MessageColor.error(
-                        f"<Cannot dereference [{MemoryColor.get(right.before_value)}]>"
-                    )
-                elif len(telescope_addresses) == 1:
-                    # If only one address, and we didn't telescope, it means we couldn't reason about the dereferenced memory
-                    # Simply display the address
-
-                    # As an example, this path is taken for the following case:
-                    # mov rdi, qword ptr [rip + 0x17d40] where the resolved memory address is in writeable memory,
-                    # and we are not emulating. This means we cannot savely dereference (if PC is not at the current instruction address)
-                    telescope_print = None
-                else:
-                    # Start showing at dereferenced by, hence the [1:]
-                    telescope_print = f"{super()._telescope_format_list(telescope_addresses[1:], TELESCOPE_DEPTH, emu)}"
-
-                if telescope_print is not None:
-                    instruction.annotation = f"{left.str}, {right.str} => {telescope_print}"
-                else:
-                    instruction.annotation = f"{left.str}, {right.str}"
-
    def handle_vmovaps(self, instruction: PwndbgInstruction, emu: Emulator) -> None:
        # If the source or destination is in memory, it must be aligned to:
        #  16 bytes for SSE, 32 bytes for AVX, 64 bytes for AVX-512
@ -172,7 +153,7 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):

        if right.before_value is not None:
            telescope_addresses = super()._telescope(
-                right.before_value, TELESCOPE_DEPTH, instruction, right, emu
+                right.before_value, TELESCOPE_DEPTH, instruction, emu
            )
            instruction.annotation = f"{left.str} => {super()._telescope_format_list(telescope_addresses, TELESCOPE_DEPTH, emu)}"

@ -281,7 +262,7 @@ class DisassemblyAssistant(pwndbg.gdblib.disasm.arch.DisassemblyAssistant):
            return None

        if operand.type == CS_OP_MEM:
-            return self._read_memory(value, operand.cs_op.size, instruction, operand, emu)
+            return self._read_memory(value, operand.cs_op.size, instruction, emu)
        else:
            return super()._resolve_used_value(value, instruction, operand, emu)