GPT-3 Command for Pwndbg (#1589)

* ai plugin

* ai plugin ready to ship

* ai plugin ready to use

* textwrap on the ai's answer

* linted ai.py

* relaxed openai version requirement

* added pandas to requirements

* removed dependency on openai module, using requests instead

* removed dependency on openai module, using requests instead

* incorporating suggestions on PR

* added types requests and bumped requests version up to the version i'm using here

* lowering version req on requests for ubuntu 18 compat

* removed some 'if True' kludges that i was using to debug things at one point
pull/1590/head
Olivia Lucca Fraser 3 years ago committed by GitHub
parent 449070557d
commit 78da129e50
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,50 @@
## Command ai ##
```
usage: ai [-h] [-M MODEL] [-t TEMPERATURE] [-m MAX_TOKENS] [-v] [-c COMMAND] <QUESTION>
```
| Positional Argument | Info |
|---------------------|------|
| QUESTION | The question you want to ask GPT-3 about the current context or command output. |
| Optional Argument | Info |
|-------------------|------|
| -h | show a help message |
| -M MODEL | specify which language model GPT-3 should use (default: text-davinci-003) |
| -t TEMPERATURE | set the temperature for the response, between 0.0 and 2.0, with higher temperatures provoking more 'adventurous' responses |
| -m MAX\_TOKENS | set the size of the response in token count, but note that there is a limit of 4096 tokens for the prompt and response combined, and a token is about 3 characters on average |
| -v | verbose mode -- show the prompt as well as the response |
| -c COMMAND | instead of asking about the context, run a gdb command and ask about its output |
If you have the [`openai`](https://github.com/openai/openai-python) Python
module installed, and the `OPENAI_API_KEY` environment variable set to a valid
OpenAI API key, then the `ai` command can be used to query the GPT-3 large
language model for insights into the current debugging context. The register
state, the stack, and the nearby assembly instructions will be made visible
to the model, along with the nearby source code, if the binary was compiled
with debugging information.
### Examples ###
```
pwndbg> ai what was the name of the function most recently called?
strcmp
pwndbg> ai how do you know this?
The assembly code shows that the function call 0x7ffff7fea240 <strcmp> was made just before the current instruction at 0x7ffff7fce2a7 <check_match+103>.
pwndbg> ai what will the next two instructions do the the eax and ecx registers?
The next two instructions will move the values stored in the esi and edi registers into the eax and ecx registers, respectively.
pwndbg> ai say that again but as a limerick
The eax and ecx registers will fill
With the values stored in esi and edi still
The instructions will move
Their values to improve
And the registers will have a new thrill
```

@ -149,6 +149,7 @@ nav:
- up: commands/misc/up.md - up: commands/misc/up.md
- down: commands/misc/down.md - down: commands/misc/down.md
- r2: commands/misc/r2.md - r2: commands/misc/r2.md
- ai: commands/misc/ai.md
- Arm: - Arm:
- cpsr: commands/arm/cpsr.md - cpsr: commands/arm/cpsr.md

@ -597,6 +597,7 @@ def HexOrAddressExpr(s):
def load_commands() -> None: def load_commands() -> None:
# pylint: disable=import-outside-toplevel # pylint: disable=import-outside-toplevel
import pwndbg.commands.ai
import pwndbg.commands.argv import pwndbg.commands.argv
import pwndbg.commands.aslr import pwndbg.commands.aslr
import pwndbg.commands.attachp import pwndbg.commands.attachp

@ -0,0 +1,267 @@
"""
This command sends information on the current debugging context to OpenAI's
GPT-3 large language model and asks it a question supplied by the user. It then
displays GPT-3's response to that question to the user.
"""
import argparse
import json
import os
import re
import textwrap
import gdb
import requests
import pwndbg
import pwndbg.color.message as M
import pwndbg.commands
from pwndbg.commands import CommandCategory
from pwndbg.commands import context
from pwndbg.gdblib import config
from pwndbg.gdblib import regs as REGS
config.add_param("ai-openai-api-key", "", "OpenAI API key")
try:
config.ai_openai_api_key = os.environ["OPENAI_API_KEY"]
except KeyError:
pass
config.add_param(
"ai-history-size",
3,
"Maximum number of successive questions and answers to maintain in the prompt for the ai command.",
)
config.add_param(
"ai-stack-depth", 16, "Rows of stack context to include in the prompt for the ai command."
)
last_question = []
last_answer = []
last_pc = None
last_command = None
dummy = False
def set_dummy_mode(d=True):
global dummy
dummy = d
return
def build_prompt(question, command=None):
if command is not None:
return build_prompt_from_command(question, command)
decompile = False
## First, get the current GDB context
## Let's begin with the assembly near the current instruction
try:
asm_rows = pwndbg.gdblib.nearpc.nearpc(emulate=True, lines=16)
asm = "\n".join(asm_rows)
except Exception as e:
print(M.error(f"Error: {e}"))
asm = gdb.execute("x/16i $pc", to_string=True)
## Next, let's get the registers
regs_rows = context.get_regs()
regs = "\n".join(regs_rows)
flags = None
try:
flags = gdb.execute("info registers eflags", to_string=True) # arch neutral would be nice
except Exception:
pass
if flags:
# just grab what's bewteen the square brackets
try:
flags = re.search(r"\[(.*)\]", flags).group(1)
except Exception:
pass
## Finally, let's get the stack
stack_rows = pwndbg.commands.telescope.telescope(
REGS.sp, to_string=True, count=config.ai_stack_depth
)
stack = "\n".join(stack_rows)
## and the backtrace
trace = gdb.execute("bt", to_string=True)
## the function arguments, if available
args = gdb.execute("info args", to_string=True)
## and the local variables, if available
local_vars = None
## and source information, if available
source = gdb.execute("list", to_string=True)
if len(source.split("\n")) < 3:
try:
source = pwndbg.ghidra.decompile()
decompile = True
except Exception as e:
pass
## Now, let's build the prompt
prompt = "Consider the following context in the GDB debugger:\n"
if asm:
prompt += f"""These are the next assembly instructions to be executed:
```
{asm}
```
"""
if regs:
prompt += f"""Here are the registers, '*' indicates a recent change:
```
{regs}
```
"""
if flags:
prompt += f"""The flags {flags} are set.\n\n"""
if stack:
prompt += f"""Here is the stack:
```
{stack}
```
"""
if trace:
prompt += f"""Here is the backtrace:
```
{trace}
```
"""
if args and "No symbol table info available" not in args:
prompt += f"""Here are the function arguments:
```
{args}
```
"""
if local_vars and "No symbol table info available" not in local_vars:
prompt += f"""Here are the local variables:
```
{local_vars}
```
"""
if source:
prompt += f"""Here is the {'decompiled ' if decompile else ''}source code near the current instruction:
```
{source}
```
"""
return finish_prompt(prompt, question)
def build_prompt_from_command(question, command):
prompt = (
f"""Running the command `{command}` in the GDB debugger yields the following output:\n"""
)
output = gdb.execute(command, to_string=True)
print(output)
prompt += f"""\n```\n{output}\n```\n\n"""
return finish_prompt(prompt, question)
def strip_colors(text):
## Now remove all ANSI color codes from the prompt
return re.sub(r"\x1b[^m]*m", "", text)
def finish_prompt(prompt, question):
## If the context hasn't changed, include the last question and answer
## (we could add more of these, but there are length limitations on prompts)
for (q, a) in zip(last_question, last_answer):
prompt += f"""Question: {q}\n\nAnswer: {a}\n\n"""
prompt += f"""Question: {question}
Answer: """
prompt = strip_colors(prompt)
return prompt
def query_openai(prompt, model="text-davinci-003", max_tokens=100, temperature=0.0):
if dummy:
return f"""This is a dummy response for unit testing purposes.\nmodel = {model}, max_tokens = {max_tokens}, temperature = {temperature}\n\nPrompt:\n\n{prompt}"""
data = {"model": model, "max_tokens": max_tokens, "prompt": prompt, "temperature": temperature}
host = "api.openai.com"
path = "/v1/completions"
url = f"https://{host}{path}"
try:
r = requests.post(
url,
data=json.dumps(data),
headers={"Content-Type": "application/json"},
auth=("Bearer", config.ai_openai_api_key),
)
res = r.json()
return res["choices"][0]["text"]
except Exception as e:
print(M.error(f"Error sending query to OpenAI: {e}"))
return None
parser = argparse.ArgumentParser(
description="Ask GPT-3 a question about the current debugging context."
)
parser.add_argument("question", nargs="+", type=str, help="The question to ask.")
parser.add_argument(
"-M", "--model", default="text-davinci-003", type=str, help="The OpenAI model to use."
)
parser.add_argument("-t", "--temperature", default=0.5, type=float, help="The temperature to use.")
parser.add_argument(
"-m", "--max-tokens", default=128, type=int, help="The maximum number of tokens to generate."
)
parser.add_argument("-v", "--verbose", action="store_true", help="Print the prompt and response.")
parser.add_argument(
"-c",
"--command",
type=str,
default=None,
help="Run a command in the GDB debugger and ask a question about the output.",
)
@pwndbg.commands.OnlyWhenRunning
@pwndbg.commands.ArgparsedCommand(parser, command_name="ai", category=CommandCategory.INTEGRATIONS)
def ai(question, model, temperature, max_tokens, verbose, command=None) -> None:
# print the arguments
global last_question, last_answer, last_pc, last_command
if not config.ai_openai_api_key:
print(
"Please set ai_openai_api_key config parameter in your GDB init file or set the OPENAI_API_KEY environment variable"
)
return
question = " ".join(question).strip()
current_pc = gdb.execute("info reg $pc", to_string=True)
if current_pc == last_pc and command is None:
command = last_command
else:
last_command = command
if last_pc != current_pc or last_command != command:
last_question.clear()
last_answer.clear()
prompt = build_prompt(question, command)
if verbose:
print(M.notice(f"Sending this prompt to OpenAI:\n\n{prompt}"))
res = query_openai(prompt, model=model, max_tokens=max_tokens, temperature=temperature).strip()
last_question.append(question)
last_answer.append(res)
last_pc = current_pc
if len(last_question) > config.ai_history_size:
last_question.pop(0)
last_answer.pop(0)
term_width = os.get_terminal_size().columns
answer = textwrap.fill(res, term_width)
print(M.success(answer))
return

@ -10,3 +10,5 @@ typing-extensions==4.3.0; python_version >= '3.7'
typing-extensions==4.1.1; python_version < '3.7' typing-extensions==4.1.1; python_version < '3.7'
unicorn==2.0.1.post1; python_version >= '3.7' unicorn==2.0.1.post1; python_version >= '3.7'
unicorn==2.0.0rc7; python_version < '3.7' unicorn==2.0.0rc7; python_version < '3.7'
requests>=2.20.0
types-requests>=2.20.0

Loading…
Cancel
Save