Skip to content

Commit

Permalink
Merge pull request #25 from plasma-umass/llm-utils
Browse files Browse the repository at this point in the history
Factour out some functions from llm-utils repository
  • Loading branch information
khlevin authored Dec 22, 2023
2 parents 1fe66f8 + 7dd8896 commit 53e72c3
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 112 deletions.
160 changes: 160 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ version = "0.2.2"
authors = [
{ name="Emery Berger", email="[email protected]" },
]
dependencies = ["openai>=0.27.0", "tiktoken>=0.4.0"]
dependencies = ["llm_utils==0.2.2", "openai>=0.27.0"]
description = "ChatDBG."
readme = "README.md"
requires-python = ">=3.7"
Expand Down
116 changes: 8 additions & 108 deletions src/chatdbg/chatdbg_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import os
import sys
import textwrap
import tiktoken
import openai

from llm_utils import llm_utils


def get_model() -> str:
all_models = ["gpt-4", "gpt-3.5-turbo"]
Expand All @@ -22,53 +21,6 @@ def get_model() -> str:
return model


def word_wrap_except_code_blocks(text: str) -> str:
"""
Wraps text except for code blocks.
Splits the text into paragraphs and wraps each paragraph,
except for paragraphs that are inside of code blocks denoted
by ` ``` `. Returns the updated text.
Args:
text: The text to wrap.
Returns:
The wrapped text.
"""
# Split text into paragraphs
paragraphs = text.split("\n\n")
wrapped_paragraphs = []
# Check if currently in a code block.
in_code_block = False
# Loop through each paragraph and apply appropriate wrapping.
for paragraph in paragraphs:
# Check for the presence of triple quotes in the paragraph
if "```" in paragraph:
# Split paragraph by triple quotes
parts = paragraph.split("```")
for i, part in enumerate(parts):
# If we are inside a code block, do not wrap the text
if in_code_block:
wrapped_paragraphs.append(part)
else:
# Otherwise, apply text wrapping to the part
wrapped_paragraphs.append(textwrap.fill(part))
# Toggle the in_code_block flag for each triple quote encountered
if i < len(parts) - 1:
wrapped_paragraphs.append("```")
in_code_block = not in_code_block
else:
# If the paragraph does not contain triple quotes and is not inside a code block, wrap the text
if not in_code_block:
wrapped_paragraphs.append(textwrap.fill(paragraph))
else:
wrapped_paragraphs.append(paragraph)
# Join all paragraphs into a single string
wrapped_text = "\n\n".join(wrapped_paragraphs)
return wrapped_text


def read_lines_width() -> int:
return 10

Expand Down Expand Up @@ -102,16 +54,7 @@ def read_lines(file_path: str, start_line: int, end_line: int) -> str:
return "\n".join(lines[start_line:end_line])


def num_tokens_from_string(string: str, model: str) -> int:
"""Returns the number of tokens in a text string."""
encoding = tiktoken.encoding_for_model(model)
num_tokens = len(encoding.encode(string))
return num_tokens


def explain(source_code: str, traceback: str, exception: str, really_run=True) -> None:
import httpx

user_prompt = "Explain what the root cause of this error is, given the following source code context for each stack frame and a traceback, and propose a fix. In your response, never refer to the frames given below (as in, 'frame 0'). Instead, always refer only to specific lines and filenames of source code.\n"
user_prompt += "\n"
user_prompt += "Source code for each stack frame:\n```\n"
Expand All @@ -124,8 +67,8 @@ def explain(source_code: str, traceback: str, exception: str, really_run=True) -
if not model:
return

input_tokens = num_tokens_from_string(user_prompt, model)
input_tokens = llm_utils.count_tokens(model, user_prompt)

if not really_run:
print(user_prompt)
print(f"Total input tokens: {input_tokens}")
Expand All @@ -140,53 +83,10 @@ def explain(source_code: str, traceback: str, exception: str, really_run=True) -
text = completion.choices[0].message.content
input_tokens = completion.usage.prompt_tokens
output_tokens = completion.usage.completion_tokens
context_window = "8K" if model == "gpt-4" else "4K" # FIXME: true as of Oct 3, 2023
cost = calculate_cost(input_tokens, output_tokens, model, context_window)
cost = llm_utils.calculate_cost(input_tokens, output_tokens, model)
text += f"\n(Total cost: approximately ${cost:.2f} USD.)"
print(word_wrap_except_code_blocks(text))
print(llm_utils.word_wrap_except_code_blocks(text))
except openai.error.AuthenticationError:
print(
"You need a valid OpenAI key to use ChatDBG. You can get a key here: https://openai.com/api/"
)
print("You need a valid OpenAI key to use ChatDBG.")
print("You can get a key here: https://platform.openai.com/api-keys")
print("Set the environment variable OPENAI_API_KEY to your key value.")


def calculate_cost(num_input_tokens, num_output_tokens, model_type, context_size):
"""
Calculate the cost of processing a request based on model type and context size.
Args:
- num_input_tokens (int): Number of input tokens.
- num_output_tokens (int): Number of output tokens.
- model_type (str): The type of GPT model used.
- context_size (str): Context size (e.g., 8K, 32K, 4K, 16K).
Returns:
float: The cost of processing the request, in USD.
"""
# Latest pricing info from OpenAI (https://openai.com/pricing), as of Oct 3 2023.
PRICING = {
"gpt-4": {
"8K": {"input": 0.03, "output": 0.06},
"32K": {"input": 0.06, "output": 0.12}
},
"gpt-3.5-turbo": {
"4K": {"input": 0.0015, "output": 0.002},
"16K": {"input": 0.003, "output": 0.004}
}
}

# Ensure model_type and context_size are valid
if model_type not in PRICING or str(context_size) not in PRICING[model_type]:
raise ValueError(f"Invalid model_type or context_size. Choose from {', '.join(PRICING.keys())} and respective context sizes.")

# Calculate total cost per token and total tokens
input_cost_per_token = PRICING[model_type][str(context_size)]["input"] / 1000
output_cost_per_token = PRICING[model_type][str(context_size)]["output"] / 1000
total_tokens = num_input_tokens + num_output_tokens

# Calculate cost for input and output separately
input_cost = num_input_tokens * input_cost_per_token
output_cost = num_output_tokens * output_cost_per_token

return input_cost + output_cost
6 changes: 3 additions & 3 deletions src/chatdbg/chatdbg_why.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os
import sys
import textwrap

import chatdbg_utils
import openai

from llm_utils import llm_utils


def why(self, arg):
user_prompt = "Explain what the root cause of this error is, given the following source code and traceback, and generate code that fixes the error."
Expand Down Expand Up @@ -74,7 +74,7 @@ def why(self, arg):
messages=[{"role": "user", "content": user_prompt}],
)
text = completion.choices[0].message.content
print(chatdbg_utils.word_wrap_except_code_blocks(text))
print(llm_utils.word_wrap_except_code_blocks(text))
except openai.error.AuthenticationError:
print(
"You need a valid OpenAI key to use ChatDBG. You can get a key here: https://openai.com/api/"
Expand Down

0 comments on commit 53e72c3

Please sign in to comment.