Improving `kmod` (#3254)

* -

* improving kmod

* bug fix

* added helpers for finding offsets

* improved helpers for determining offsets

* improving kmod helpers

* refactoring + handling kallsyms

* recovering from stash

* doc + test

* fixes based on coments

* improvements
pull/3262/head
jxuanli 4 months ago committed by GitHub
parent 3d3097669f
commit 41c8bc734b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -2,7 +2,7 @@
# kmod # kmod
```text ```text
usage: kmod [-h] [module_name] usage: kmod [-h] [-l PATH] [module_name]
``` ```
@ -18,6 +18,7 @@ Displays the loaded Linux kernel modules.
|Short|Long|Help| |Short|Long|Help|
| :--- | :--- | :--- | | :--- | :--- | :--- |
|-h|--help|show this help message and exit| |-h|--help|show this help message and exit|
|-l|--load|the path of the module to load|
<!-- END OF AUTOGENERATED PART. Do not modify this line or the line below, they mark the end of the auto-generated part of the file. If you want to extend the documentation in a way which cannot easily be done by adding to the command help description, write below the following line. --> <!-- END OF AUTOGENERATED PART. Do not modify this line or the line below, they mark the end of the auto-generated part of the file. If you want to extend the documentation in a way which cannot easily be done by adding to the command help description, write below the following line. -->
<!-- ------------\>8---- ----\>8---- ----\>8------------ --> <!-- ------------\>8---- ----\>8---- ----\>8------------ -->

@ -648,18 +648,24 @@ def num_numa_nodes() -> int:
def node_data() -> pwndbg.dbg_mod.Value: def node_data() -> pwndbg.dbg_mod.Value:
if arch_symbols() is not None: if (syms := arch_symbols()) is not None:
return arch_symbols().node_data() return syms.node_data()
return None return None
def slab_caches() -> pwndbg.dbg_mod.Value: def slab_caches() -> pwndbg.dbg_mod.Value:
if arch_symbols() is not None: if (syms := arch_symbols()) is not None:
return arch_symbols().slab_caches() return syms.slab_caches()
return None return None
def per_cpu_offset() -> pwndbg.dbg_mod.Value: def per_cpu_offset() -> pwndbg.dbg_mod.Value:
if arch_symbols() is not None: if (syms := arch_symbols()) is not None:
return arch_symbols().per_cpu_offset() return syms.per_cpu_offset()
return None
def modules() -> pwndbg.dbg_mod.Value:
if (syms := arch_symbols()) is not None:
return syms.modules()
return None return None

@ -12,6 +12,7 @@ from pwnlib.util.packing import u64
import pwndbg.aglib import pwndbg.aglib
import pwndbg.aglib.kernel import pwndbg.aglib.kernel
import pwndbg.aglib.kernel.kmod
import pwndbg.aglib.memory import pwndbg.aglib.memory
import pwndbg.color.message as M import pwndbg.color.message as M
import pwndbg.commands import pwndbg.commands
@ -80,6 +81,8 @@ class Kallsyms:
self.names = self.find_names() self.names = self.find_names()
self.kernel_addresses = self.get_kernel_addresses() self.kernel_addresses = self.get_kernel_addresses()
self.parse_symbol_table() self.parse_symbol_table()
for sym_name, sym_addr, sym_type in pwndbg.aglib.kernel.kmod.all_modules_kallsyms():
self.kallsyms[sym_name] = (sym_addr, sym_type)
print(M.info(f"Found {len(self.kallsyms)} ksymbols")) print(M.info(f"Found {len(self.kallsyms)} ksymbols"))
def find_token_table(self) -> int: def find_token_table(self) -> int:

@ -0,0 +1,221 @@
from __future__ import annotations
from enum import Enum
from typing import List
from typing import Tuple
import pwndbg
import pwndbg.color.message as M
import pwndbg.commands
from pwndbg.aglib.kernel.macros import for_each_entry
class mod_mem_type(Enum):
# Calculate runtime memory footprint by summing sizes of MOD_TEXT, MOD_DATA, MOD_RODATA, MOD_RO_AFTER_INIT,
# which excludes initialization sections that are freed after the module load. See `enum mod_mem_type` in kernel source.
MOD_TEXT = 0
MOD_DATA = 1
MOD_RODATA = 2
MOD_RO_AFTER_INIT = 3 # might be empty
# MOD_INIT_TEXT,
# MOD_INIT_DATA,
# MOD_INIT_RODATA,
MOD_MEM_NUM_TYPES = 4
# TODO: handle potential negative offsets when CONFIG_RANDSTRUCT=y
@pwndbg.lib.cache.cache_until("stop")
def module_name_offset():
modules = pwndbg.aglib.kernel.modules()
if modules is None:
print(M.warn("Could not find modules"))
return None
module = pwndbg.aglib.memory.read_pointer_width(int(modules))
for i in range(0x100):
offset = i * pwndbg.aglib.arch.ptrsize
try:
bs = pwndbg.aglib.memory.string(module + offset).decode("ascii")
if len(bs) < 2:
continue
return offset
except Exception:
pass
print(M.warn("Could not find module->name"))
return None
@pwndbg.lib.cache.cache_until("stop")
def module_mem_offset() -> Tuple[int | None, int | None, int | None]:
modules = pwndbg.aglib.kernel.modules()
if modules is None:
print(M.warn("Could not find modules"))
return None, None, None
module = pwndbg.aglib.memory.read_pointer_width(int(modules))
for i in range(0x100):
offset = i * pwndbg.aglib.arch.ptrsize
min_size = 0x10
if pwndbg.aglib.kernel.krelease() >= (6, 13):
min_size += 0x8
for module_memory_size in (
min_size,
min_size + 0x38,
):
found = True
for mem_type in range(mod_mem_type.MOD_RO_AFTER_INIT.value):
mem_ptr = module + offset + mem_type * module_memory_size
if pwndbg.aglib.memory.peek(mem_ptr) is None:
found = False
break
base = pwndbg.aglib.memory.read_pointer_width(mem_ptr)
if base == 0 or ((base & 0xFFF) != 0):
found = False
break
size_offset = pwndbg.aglib.arch.ptrsize
if pwndbg.aglib.kernel.krelease() >= (6, 13):
# https://elixir.bootlin.com/linux/v6.13/source/include/linux/module.h#L368
# additional fields were added
size_offset += pwndbg.aglib.arch.ptrsize + 4
size = pwndbg.aglib.memory.u32(mem_ptr + size_offset)
if not 0 < size < 0x100000:
found = False
break
if found:
return offset, module_memory_size, size_offset
print(M.warn("Could not find module->mem"))
return None, None, None
@pwndbg.lib.cache.cache_until("stop")
def module_layout_offset() -> Tuple[int | None, int | None]:
modules = pwndbg.aglib.kernel.modules()
if modules is None:
print(M.warn("Could not find modules"))
return None, None
module = pwndbg.aglib.memory.read_pointer_width(int(modules))
for i in range(0x100): # enough to search through the struct
offset = i * pwndbg.aglib.arch.ptrsize
ptr = module + offset + pwndbg.aglib.arch.ptrsize
if pwndbg.aglib.memory.peek(ptr) is None:
continue
base = pwndbg.aglib.memory.read_pointer_width(ptr)
if base == 0 or ((base & 0xFFF) != 0):
continue
valid = True
for i in range(4):
size = pwndbg.aglib.memory.u32(ptr + 4 * i)
if not 0 < size < 0x100000:
valid = False
break
if valid:
return offset, offset + pwndbg.aglib.arch.ptrsize
print(M.warn("Could not find module->init_layout"))
return None, None
@pwndbg.lib.cache.cache_until("stop")
def module_kallsyms_offset():
modules = pwndbg.aglib.kernel.modules()
if modules is None:
print(M.warn("Could not find modules"))
return None, None
module = pwndbg.aglib.memory.read_pointer_width(int(modules))
for i in range(0x100):
offset = i * pwndbg.aglib.arch.ptrsize
ptr = module + offset
if pwndbg.aglib.memory.peek(ptr) is None:
continue
kallsyms = pwndbg.aglib.memory.read_pointer_width(ptr)
if pwndbg.aglib.memory.peek(kallsyms) is None or kallsyms == 0:
continue
symtab = pwndbg.aglib.memory.read_pointer_width(kallsyms)
if pwndbg.aglib.memory.peek(symtab) is None:
continue
num_symtab = pwndbg.aglib.memory.read_pointer_width(kallsyms + pwndbg.aglib.arch.ptrsize)
if pwndbg.aglib.memory.peek(num_symtab) is not None or num_symtab == 0:
continue
strtab = pwndbg.aglib.memory.read_pointer_width(kallsyms + pwndbg.aglib.arch.ptrsize * 2)
if pwndbg.aglib.memory.peek(strtab) is None:
continue
if pwndbg.aglib.kernel.krelease() >= (5, 2):
typetab = pwndbg.aglib.memory.read_pointer_width(
kallsyms + pwndbg.aglib.arch.ptrsize * 3
)
if pwndbg.aglib.memory.peek(typetab) is None:
continue
return offset
print(M.warn("Could not find module->kallsyms"))
return None
@pwndbg.lib.cache.cache_until("stop")
def module_list_with_typeinfo() -> Tuple[pwndbg.dbg_mod.Value, ...]:
modules = pwndbg.aglib.kernel.modules()
if modules is None:
print(M.warn("Could not find modules"))
return ()
result = []
head = pwndbg.aglib.memory.get_typed_pointer_value("struct list_head", modules)
for module in for_each_entry(head, "struct module", "list"):
result.append(module)
# each entry if pointing to hte start of the module
return tuple(result)
@pwndbg.lib.cache.cache_until("stop")
def module_list() -> Tuple[int, ...]:
modules = pwndbg.aglib.kernel.modules()
if modules is None:
print(M.warn("Could not find modules"))
return ()
modules = int(modules)
result = []
cur = pwndbg.aglib.memory.read_pointer_width(modules)
while cur != modules:
result.append(cur)
cur = pwndbg.aglib.memory.read_pointer_width(cur)
# each entry is pointing to the module->next
return tuple(result)
def parse_module_kallsyms(kallsyms: int) -> List[Tuple[str, int, str]]:
is_64bit = pwndbg.aglib.arch.ptrsize == 8
sizeof_symtab_entry = 24 if is_64bit else 16
result = []
symtab = pwndbg.aglib.memory.read_pointer_width(kallsyms)
num_symtab = pwndbg.aglib.memory.read_pointer_width(kallsyms + pwndbg.aglib.arch.ptrsize)
strtab = pwndbg.aglib.memory.read_pointer_width(kallsyms + pwndbg.aglib.arch.ptrsize * 2)
typetab = None
if pwndbg.aglib.kernel.krelease() >= (5, 2):
typetab = pwndbg.aglib.memory.read_pointer_width(kallsyms + pwndbg.aglib.arch.ptrsize * 3)
strtab_offset = 0
for i in range(num_symtab):
sym_name = pwndbg.aglib.memory.string(strtab + strtab_offset).decode("utf-8")
strtab_offset += len(sym_name) + 1
if len(sym_name) == 0:
continue
sym_addr = pwndbg.aglib.memory.read_pointer_width(
int(symtab) + sizeof_symtab_entry * i + pwndbg.aglib.arch.ptrsize
)
sym_type = None
if pwndbg.aglib.kernel.krelease() >= (5, 2):
sym_type = chr(pwndbg.aglib.memory.u8(typetab + i))
else:
sym_type = chr(
pwndbg.aglib.memory.u8(symtab + sizeof_symtab_entry * i + 16 if is_64bit else 8)
)
result.append((sym_name, sym_addr, sym_type))
return result
def all_modules_kallsyms() -> List[Tuple[str, int, str]]:
result = []
if pwndbg.aglib.typeinfo.load("struct module") is not None:
for module in module_list_with_typeinfo():
if module.type.has_field("kallsyms"):
kallsyms = int(module["kallsyms"])
result += parse_module_kallsyms(kallsyms)
elif module_kallsyms_offset() is not None:
for module in module_list():
kallsyms = pwndbg.aglib.memory.read_pointer_width(module + module_kallsyms_offset())
result += parse_module_kallsyms(kallsyms)
return result

@ -296,7 +296,19 @@ class ArchSymbols:
per_cpu_offset = pwndbg.aglib.symbol.lookup_symbol("__per_cpu_offset") per_cpu_offset = pwndbg.aglib.symbol.lookup_symbol("__per_cpu_offset")
if per_cpu_offset is not None: if per_cpu_offset is not None:
return per_cpu_offset return per_cpu_offset
return self._per_cpu_offset() per_cpu_offset = self._per_cpu_offset()
if per_cpu_offset is None:
return None
return pwndbg.aglib.memory.get_typed_pointer("unsigned long", per_cpu_offset)
def modules(self):
modules = pwndbg.aglib.symbol.lookup_symbol("modules")
if modules:
return modules
modules = self._modules()
if modules is None:
return None
return pwndbg.aglib.memory.get_typed_pointer("unsigned long", modules)
def _node_data(self): def _node_data(self):
raise NotImplementedError() raise NotImplementedError()
@ -307,6 +319,9 @@ class ArchSymbols:
def _per_cpu_offset(self): def _per_cpu_offset(self):
raise NotImplementedError() raise NotImplementedError()
def _modules(self):
raise NotImplementedError()
class x86_64Symbols(ArchSymbols): class x86_64Symbols(ArchSymbols):
# mov reg, [... - 0x...] # mov reg, [... - 0x...]
@ -364,6 +379,10 @@ class x86_64Symbols(ArchSymbols):
return result return result
return self.qword_mov_reg_ripoff(disass) return self.qword_mov_reg_ripoff(disass)
def _modules(self):
disass = self.disass("find_module_all")
return self.qword_mov_reg_ripoff(disass)
class Aarch64Symbols(ArchSymbols): class Aarch64Symbols(ArchSymbols):
# adrp x?, <kernel address> # adrp x?, <kernel address>
@ -406,8 +425,26 @@ class Aarch64Symbols(ArchSymbols):
m = pattern.search(disass) m = pattern.search(disass)
if m is None: if m is None:
return None return None
return sum([int(m.group(i), 16) for i in [2, 3, 4]]) return sum(int(m.group(i), 16) for i in [2, 3, 4])
def _per_cpu_offset(self): def _per_cpu_offset(self):
disass = self.disass("nr_iowait_cpu") disass = self.disass("nr_iowait_cpu")
return self.qword_adrp_add_const(disass) return self.qword_adrp_add_const(disass)
def _modules(self):
disass = self.disass("find_module_all")
# adrp x<num>, 0x....
# ...
# add x<num>, x<num>, #0x...
# ...
# ldr x?, [x<num>, #0x]!...
pattern = re.compile(
r"adrp\s+x(\d+),\s+0x([0-9a-fA-F]+).*?\n"
r".*?add\s+x\1,\s+x\1,\s+#0x([0-9a-fA-F]+).*?\n"
r".*?ldr\s+x\d+,\s+\[x\1,\s+#0x([0-9a-fA-F]+)\]!",
re.DOTALL,
)
m = pattern.search(disass)
if m is None:
return None
return sum(int(m.group(i), 16) for i in [2, 3, 4])

@ -9,10 +9,6 @@ import pwndbg.dbg
from pwndbg import config from pwndbg import config
from pwndbg.commands import CommandCategory from pwndbg.commands import CommandCategory
if pwndbg.dbg.is_gdblib_available():
import gdb
parser = argparse.ArgumentParser(description="Finds the kernel virtual base address.") parser = argparse.ArgumentParser(description="Finds the kernel virtual base address.")
parser.add_argument("-r", "--rebase", action="store_true", help="rebase loaded symbol file") parser.add_argument("-r", "--rebase", action="store_true", help="rebase loaded symbol file")
@ -40,9 +36,6 @@ def kbase(rebase=False) -> None:
symbol_file = pwndbg.aglib.proc.exe symbol_file = pwndbg.aglib.proc.exe
if symbol_file: if symbol_file:
if pwndbg.dbg.is_gdblib_available(): pwndbg.dbg.selected_inferior().add_symbol_file(symbol_file, base)
gdb.execute(f"add-symbol-file {symbol_file} {hex(base)}")
else:
print(M.error("Adding symbol not supported in LLDB yet"))
else: else:
print(M.error("No symbol file is currently loaded")) print(M.error("No symbol file is currently loaded"))

@ -9,53 +9,85 @@ import argparse
from tabulate import tabulate from tabulate import tabulate
import pwndbg
import pwndbg.aglib.kernel.kmod
import pwndbg.color.message as M
import pwndbg.commands import pwndbg.commands
from pwndbg.aglib.kernel.macros import for_each_entry
parser = argparse.ArgumentParser(description="Displays the loaded Linux kernel modules.") parser = argparse.ArgumentParser(description="Displays the loaded Linux kernel modules.")
parser.add_argument( parser.add_argument(
"module_name", nargs="?", type=str, help="A module name substring to filter for" "module_name", nargs="?", type=str, help="A module name substring to filter for"
) )
parser.add_argument("-l", "--load", dest="path", type=str, help="the path of the module to load")
@pwndbg.commands.Command(parser, category=pwndbg.commands.CommandCategory.KERNEL) @pwndbg.commands.Command(parser, category=pwndbg.commands.CommandCategory.KERNEL)
@pwndbg.commands.OnlyWhenQemuKernel @pwndbg.commands.OnlyWhenQemuKernel
@pwndbg.commands.OnlyWhenPagingEnabled @pwndbg.commands.OnlyWhenPagingEnabled
@pwndbg.commands.OnlyWithKernelDebugInfo @pwndbg.commands.OnlyWithKernelDebugSymbols
def kmod(module_name=None) -> None: def kmod(module_name=None, path=None) -> None:
# Look up the address of the `modules` symbol, containing the head of the linked list of kernel modules # Look up the address of the `modules` symbol, containing the head of the linked list of kernel modules
modules_head = pwndbg.aglib.symbol.lookup_symbol_addr("modules") modules_head = pwndbg.aglib.kernel.modules()
if modules_head is None: if modules_head is None:
print( print(
"The modules symbol was not found. This may indicate that the symbol is not available in the current build." "The modules symbol was not found. This may indicate that the symbol is not available in the current build."
) )
return return
print(f"Kernel modules address found at {modules_head:#x}.\n") print(f"Kernel modules address found at {int(modules_head):#x}.\n")
try:
table = []
headers = ["Address", "Name", "Size", "Used by"]
head = pwndbg.aglib.memory.get_typed_pointer_value("struct list_head", modules_head)
table = []
headers = ["Address", "Name", "Size", "Used by"]
if pwndbg.aglib.typeinfo.load("struct module") is not None:
# Iterate through the linked list of modules using for_each_entry # Iterate through the linked list of modules using for_each_entry
for module in for_each_entry(head, "struct module", "list"): for module in pwndbg.aglib.kernel.kmod.module_list_with_typeinfo():
addr = int(module["mem"][0]["base"])
name = pwndbg.aglib.memory.string(int(module["name"].address)).decode( name = pwndbg.aglib.memory.string(int(module["name"].address)).decode(
"utf-8", errors="ignore" "utf-8", errors="ignore"
) )
addr, size = None, None
# Calculate runtime memory footprint by summing sizes of MOD_TEXT, MOD_DATA, MOD_RODATA, MOD_RO_AFTER_INIT, if pwndbg.aglib.kernel.krelease() >= (6, 4):
# which excludes initialization sections that are freed after the module load. See `enum mod_mem_type` in kernel source. addr = int(module["mem"][0]["base"])
size = sum(int(module["mem"][i]["size"]) for i in range(4)) size = sum(
int(module["mem"][i]["size"])
for i in range(pwndbg.aglib.kernel.kmod.mod_mem_type.MOD_MEM_NUM_TYPES.value)
)
else:
addr = int(module["init_layout"]["addr"])
size = module["init_layout"]["size"]
uses = int(module["refcnt"]["counter"]) - 1 uses = int(module["refcnt"]["counter"]) - 1
# If module_name is provided, filter modules by name substring # If module_name is provided, filter modules by name substring
if not module_name or module_name in name: if not module_name or module_name in name:
table.append([f"{addr:#x}", name, size, uses]) table.append([f"{addr:#x}", name, size, uses])
else:
cur = pwndbg.aglib.memory.read_pointer_width(int(modules_head))
name_offset = pwndbg.aglib.kernel.kmod.module_name_offset()
for cur in pwndbg.aglib.kernel.kmod.module_list():
name = pwndbg.aglib.memory.string(cur + name_offset).decode()
if pwndbg.aglib.kernel.krelease() >= (6, 4):
mem_offset, module_memory_size, size_offset = (
pwndbg.aglib.kernel.kmod.module_mem_offset()
)
addr = pwndbg.aglib.memory.read_pointer_width(cur + mem_offset)
size = 0
for i in range(pwndbg.aglib.kernel.kmod.mod_mem_type.MOD_MEM_NUM_TYPES.value):
ptr = cur + mem_offset + module_memory_size * i
size += pwndbg.aglib.memory.u32(ptr + size_offset)
else:
addr_offset, size_offset = pwndbg.aglib.kernel.kmod.module_layout_offset()
addr = pwndbg.aglib.memory.read_pointer_width(cur + addr_offset)
size = pwndbg.aglib.memory.u32(cur + size_offset)
print(tabulate(table, headers=headers, tablefmt="simple")) if not module_name or module_name in name:
except Exception as e: table.append([f"{addr:#x}", name, size, "-"])
print( if path is not None:
f"An error occurred while retrieving kernel modules. It may not be supported by your kernel version or debug symbols: {e}" if len(table) == 1:
) pwndbg.dbg.selected_inferior().add_symbol_file(path, table[0][0])
return
if len(table) > 1:
print(M.warn("Multiple modules detected with the given filter"))
else:
print(M.warn("No modules detected with the given filter."))
return
print(tabulate(table, headers=headers, tablefmt="simple"))

@ -626,6 +626,12 @@ class Process:
""" """
raise NotImplementedError() raise NotImplementedError()
def add_symbol_file(self, path, base):
"""
Adds a symbol file at base
"""
raise NotImplementedError()
class TypeCode(Enum): class TypeCode(Enum):
""" """

@ -976,6 +976,10 @@ class GDBProcess(pwndbg.dbg_mod.Process):
# We're done. # We're done.
break break
@override
def add_symbol_file(self, path, base):
gdb.execute(f"add-symbol-file {path} {base}")
class GDBExecutionController(pwndbg.dbg_mod.ExecutionController): class GDBExecutionController(pwndbg.dbg_mod.ExecutionController):
@override @override

@ -49,9 +49,8 @@ def test_command_kdmesg():
def test_command_kmod(): def test_command_kmod():
if not pwndbg.aglib.kernel.has_debug_info(): if not pwndbg.aglib.kernel.has_debug_symbols("find_module_all"):
res = gdb.execute("kmod", to_string=True) res = gdb.execute("kmod", to_string=True)
assert "may only be run when debugging a Linux kernel with debug" in res
return return
res = gdb.execute("kmod", to_string=True) res = gdb.execute("kmod", to_string=True)

Loading…
Cancel
Save