pwndbg/scripts/_gen_command_docs.py

#!/usr/bin/env python
"""
You should use scripts/generate_docs.sh and scripts/verify_docs.sh instead
of using this.

If the PWNDBG_GEN_DOC_JUST_VERIFY environment variable
is set, then    : Exit with non-zero exit status if the docs/commands/ files
                  aren't up to date with the sources. Don't modify anything.

If it isn't, this fixes up the docs/commands/ files to be up
to date with the (argparse) information from the sources.
"""

from __future__ import annotations

# We need to patch shutil.get_terminal_size() because otherwise argparse will output
# .format_usage() based on terminal width which may be different for different users.
# I tried every other solution, it doesn't work :).
import shutil

shutil.get_terminal_size = lambda fallback=(80, 24): shutil.os.terminal_size((80, 24))

import argparse
import os
import re
import sys
from typing import Dict

from mdutils.mdutils import MdUtils

import pwndbg.commands
from scripts._gen_docs_generic import verify_existence

autogen_end_marker1 = "<!-- END OF AUTOGENERATED PART. Do not modify this line or the line below, they mark the end of the auto-generated part of the file. If you want to extend the documentation in a way which cannot easily be done by adding to the command help description, write below the following line. -->\n"
autogen_end_marker2 = "<!-- ------------\\>8---- ----\\>8---- ----\\>8------------ -->\n"


def inline_code(code):
    return f"`{code}`"


def category_to_folder_name(category) -> str:
    folder = category.lower()
    folder = re.sub(r"[ /]", "_", folder)  # replace all spaces and / with _
    # Don't allow wacky characters for folder names. If you hit this assert, feel free
    # to update the regex above to sanitize the category name.
    assert all(c.isalnum() or c == "_" for c in folder)
    return folder


def extract_sources() -> (Dict[str, argparse.ArgumentParser], Dict[str, list[str]]):
    """
    Extract the sources.

    Returns:
        (A dictionary that maps the filenames of .md files to the corresponding
        command's ArgumentParser objects, A dictionary that maps a category name
        to a list of filenames for commands that belong to the category).
    """
    filename_to_source = {}
    category_to_filename = {}

    # This depends on pwndbg.commands.load_commands() importing every command :)
    # `obj` iterates over all modules in pwndbg.commands (among other stuff).
    for obj_name in dir(pwndbg.commands):
        # Get the (potential) module by name.
        mod = getattr(pwndbg.commands, obj_name)

        # Iterate over everything in the module, which includes the command functions.
        for fn_name in dir(mod):
            fn = getattr(mod, fn_name)

            if not isinstance(fn, pwndbg.commands.Command):
                continue
            # This object is a command (an _ArgparsedCommand object)!

            category = fn.category
            parser = fn.parser

            if category is None:
                # Should never be reached since ArgparsedCommand.__init__() will throw the error first.
                print(
                    f"ERROR: Command function {fn_name} in {obj_name} does not have an assigned category."
                )
                sys.exit(4)

            cat_folder = category_to_folder_name(category.value)
            filename = (
                base_path + f"{cat_folder}/{parser.prog}.md"
            )  # Should be using join but whatever.

            parser = getattr(fn, "parser")
            assert parser

            filename_to_source[filename] = parser

            if category.value not in category_to_filename:
                category_to_filename[category.value] = []
            category_to_filename[category.value].append(filename)

    assert filename_to_source
    assert category_to_filename
    return filename_to_source, category_to_filename


def convert_to_markdown(filename: str, parser: argparse.ArgumentParser) -> str:
    name = parser.prog  # parser.prog is the command name after all processing
    description = parser.description

    if not description:
        print(f"ERROR: Command {name} ({filename}) does not have a description.")
        sys.exit(5)

    mdFile = MdUtils(filename)

    mdFile.new_header(level=1, title=name)
    if parser.description:
        mdFile.new_header(level=2, title="Description")
        mdFile.new_paragraph(description)

    mdFile.new_header(level=2, title="Usage:")
    mdFile.insert_code(parser.format_usage(), language="bash")

    used_actions = {}
    positionals = ["Positional Argument", "Help"]
    optionals = ["Short", "Long", "Default", "Help"]

    # Process positional arguments
    if parser._positionals._group_actions:
        for action in parser._positionals._group_actions:
            list_of_str = [inline_code(action.dest), action.help]
            this_id = id(action)
            if this_id in used_actions:
                continue
            used_actions[this_id] = True

            positionals.extend(list_of_str)

        mdFile.new_header(level=2, title="Positional Arguments")
        positionals = [
            inline_code(di) if di is None else di.replace("\n", " ") for di in positionals
        ]
        mdFile.new_table(
            columns=2,
            rows=len(positionals) // 2,
            text=positionals,
            text_align="left",
        )

    # Process optional arguments
    if parser._option_string_actions:
        for k in parser._option_string_actions:
            action = parser._option_string_actions[k]
            list_of_str = ["", "", "", action.help]
            this_id = id(action)
            if this_id in used_actions:
                continue
            used_actions[this_id] = True

            for opt in action.option_strings:
                # --, long option
                if len(opt) > 1 and opt[1] in parser.prefix_chars:
                    list_of_str[1] = inline_code(opt)
                # short opt
                elif len(opt) > 0 and opt[0] in parser.prefix_chars:
                    list_of_str[0] = inline_code(opt)

            if not (
                isinstance(action.default, bool)
                or isinstance(action, argparse._VersionAction)
                or isinstance(action, argparse._HelpAction)
            ):
                default = (
                    action.default if isinstance(action.default, str) else repr(action.default)
                )
                list_of_str[2] = inline_code(default)

            optionals.extend(list_of_str)

        mdFile.new_header(level=2, title="Optional Arguments")
        optionals = [inline_code(di) if di is None else di.replace("\n", " ") for di in optionals]
        mdFile.new_table(
            columns=4,
            rows=len(optionals) // 4,
            text=optionals,
            text_align="left",
        )

    autogen_warning = "<!-- THIS PART OF THIS FILE IS AUTOGENERATED. DO NOT MODIFY IT. See scripts/generate_docs.sh -->"

    return autogen_warning + "\n" + mdFile.get_md_text()


def convert_all_to_markdown(
    filename_to_parser: Dict[str, argparse.ArgumentParser],
) -> Dict[str, str]:
    result = {}
    for file in filename_to_parser:
        result[file] = convert_to_markdown(file, filename_to_parser[file])
    return result


def generate_index(
    filename_to_parser: Dict[str, argparse.ArgumentParser],
    category_to_filename: Dict[str, list[str]],
) -> str:
    mdFile = MdUtils("docs/commands/index.md")
    mdFile.new_header(level=1, title="Commands")

    for cat in sorted(category_to_filename):
        mdFile.new_header(level=2, title=f"{cat}")

        items = []
        for filename in sorted(category_to_filename[cat]):
            parser = filename_to_parser[filename]
            name = parser.prog
            short_desc = parser.description.strip().splitlines()[0]
            folder = category_to_folder_name(cat)
            items.append(f" [{name}]({folder}/{name}.md) - {short_desc}")

        mdFile.new_list(items=items)

    index_autogen_warning = (
        "<!-- THIS FILE IS AUTOGENERATED. DO NOT EDIT IT. See ~/scripts/generate_docs.sh -->\n"
    )
    return index_autogen_warning + mdFile.get_md_text()


def verify_files(filename_to_markdown: Dict[str, str]) -> str | None:
    """
    Verify all the markdown files are up to date with the sources.

    Returns:
        None if everything is up-to-date.
        A string containing the error message if something is not.
    """

    for filename, markdown in filename_to_markdown.items():
        print(f"Checking {filename} ..")

        if not os.path.exists(filename):
            return f"File {filename} does not exist."

        file_data = ""
        with open(filename, "r") as file:
            file_data = file.readlines()

        markdown = [x + "\n" for x in markdown.splitlines()]
        mkdlen = len(markdown)

        if len(file_data) < (mkdlen + 3):
            return (
                f"File {filename} is too short. Expected {mkdlen + 3} lines, got {len(file_data)}."
            )

        if not (
            file_data[mkdlen + 1] == autogen_end_marker1
            and file_data[mkdlen + 2] == autogen_end_marker2
        ):
            return f'Expected autogenerated end markers in {filename} @ lines {mkdlen} and {mkdlen+1}. Instead found "{file_data[mkdlen]}" and "{file_data[mkdlen+1]}".'

        for i in range(mkdlen):
            if file_data[i] != markdown[i]:
                return f'File {filename} differs from autogenerated on line {i}.\nFile: "{file_data[i]}".\nAutogenerated: "{markdown[i]}".'

    return None


def update_files(filename_to_markdown: Dict[str, str]):
    """
    Fix files so they are up to date with the sources. This also
    creates new files/directories if needed.
    """
    for filename, markdown in filename_to_markdown.items():
        print(f"Updating {filename} ..")

        if not os.path.exists(filename):
            # Simple case, just create the file and write it.
            os.makedirs(os.path.dirname(filename), exist_ok=True)
            with open(filename, "w") as file:
                file.write(markdown + "\n" + autogen_end_marker1 + autogen_end_marker2)
            continue

        # Need to find the marker in the file, and edit only above that part.
        with open(filename, "r+") as file:
            file_data = file.readlines()
            marker_idx = -1
            for i in reversed(range(len(file_data))):
                if file_data[i] == autogen_end_marker2:
                    if i == 0 or file_data[i - 1] != autogen_end_marker1:
                        print(
                            f"ERROR: In file {filename} found the second autogen marker, but couldn't find the first ({autogen_end_marker1})."
                        )
                        sys.exit(6)
                    marker_idx = i - 1
                    break

            if marker_idx == -1:
                print(
                    f"ERROR: In file {filename} couldn't find autogen marker ({autogen_end_marker2})."
                )
                sys.exit(7)

            handwritten_doc = "".join(file_data[marker_idx:])  # Includes the autogen markers

            final = markdown + "\n" + handwritten_doc
            file.seek(0)
            file.write(final)
            file.truncate()


def file_has_handwritten(filename: str) -> bool:
    """
    Returns if a file has a hand-written part.

    Also returns true if the autogen markers are malformed or
    don't exist.
    """
    with open(filename, "r+") as file:
        file_data = file.readlines()
        marker_idx = -1
        for i in reversed(range(len(file_data))):
            if file_data[i] == autogen_end_marker2:
                if i == 0 or file_data[i - 1] != autogen_end_marker1:
                    return True

                marker_idx = i - 1
                break

        if marker_idx == -1:
            return True

        if len(file_data) == marker_idx + 2:
            # there is nothing after the markers
            return False

        handwritten_doc = "".join(file_data[marker_idx + 2 :])
        if handwritten_doc.strip():
            # There is some non-whitespace after the markers
            return True
        # There is only whitespace after the markers, we won't
        # complain about this.
        return False


base_path = "docs/commands/"  # Must have trailing slash.

# ==== Start ====

if len(sys.argv) > 1:
    print("This script doesn't accept any arguments.")
    print("See top of the file for usage.")
    sys.exit(1)

just_verify = False
if os.getenv("PWNDBG_GEN_DOC_JUST_VERIFY"):
    just_verify = True

print("\n==== Command Documentation ====")

extracted, cat_to_names = extract_sources()
markdowned = convert_all_to_markdown(extracted)
markdowned[base_path + "index.md"] = generate_index(extracted, cat_to_names)

if just_verify:
    print("Checking if all files are in place..")
    missing, extra = verify_existence(markdowned.keys(), base_path)
    if missing or extra:
        print("To fix this please run ./scripts/generate_docs.sh.")
        sys.exit(2)
    print("Every file is where it should be!")

    print("Verifying contents...")
    err = verify_files(markdowned)
    if err:
        print("VERIFICATION FAILED. The files differ from what would be auto-generated.")
        print("Error:", err)
        print("Please run ./scripts/generate_docs.sh from project root and commit the changes.")
        sys.exit(3)

    print("Verification successful!")
else:
    print("Updating files...")
    update_files(markdowned)
    print("Update successful.")

    missing, extra = verify_existence(markdowned.keys(), base_path)
    assert not missing and "Some files are missing, which should be impossible."
    if extra:
        print("Take care! Deleting these extra files:")
        not_deleted = []
        for e in extra:
            if file_has_handwritten(e):
                not_deleted.append(e)
            else:
                print(e)
                os.remove(e)

        if not_deleted:
            print("\nSome files were not auto-deleted as they contain a hand-written part")
            print("(or the markers for the hand-written part are malformed). Please delete")
            print("them manually, probably after transferring the hand-written part to a")
            print("new file.")
            print(f"Files ({len(not_deleted)}):")
            print("\n".join(not_deleted))
            exit(18)
        else:
            print("Deleted successfully.")