From 445e2462311d08f951d89decfa5577827bcd358d Mon Sep 17 00:00:00 2001 From: Maarten Grootendorst Date: Wed, 13 Dec 2023 07:24:44 +0100 Subject: [PATCH] Support for OpenAI >= 1 (#189) * Now only support for openai >= 1.0 * Use the `client` parameter to access the API (see docstrings) --- README.md | 8 ++++---- docs/changelog.md | 20 ++++++++++++++++++++ docs/guides/keyllm.md | 22 ++++++++++------------ docs/guides/llms.md | 8 ++++---- keybert/_llm.py | 4 ++-- keybert/llm/_openai.py | 37 +++++++++++++++++++------------------ 6 files changed, 59 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index adf9fa92..3e4e6a03 100644 --- a/README.md +++ b/README.md @@ -241,8 +241,8 @@ from keybert.llm import OpenAI from keybert import KeyLLM # Create your LLM -openai.api_key = "sk-..." -llm = OpenAI() +client = openai.OpenAI(api_key=MY_API_KEY) +llm = OpenAI(client) # Load it in KeyLLM kw_model = KeyLLM(llm) @@ -265,8 +265,8 @@ model = SentenceTransformer('all-MiniLM-L6-v2') embeddings = model.encode(MY_DOCUMENTS, convert_to_tensor=True) # Create your LLM -openai.api_key = "sk-..." -llm = OpenAI() +client = openai.OpenAI(api_key=MY_API_KEY) +llm = OpenAI(client) # Load it in KeyLLM kw_model = KeyLLM(llm) diff --git a/docs/changelog.md b/docs/changelog.md index 7b673507..6dd594ed 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -3,6 +3,26 @@ hide: - navigation --- +## **Version 0.8.3** +*Release date: 29 November, 2023* + +* Fix support for openai>=1 + +You can now use it as follows: + +```python +import openai +from keybert.llm import OpenAI +from keybert import KeyLLM + +# Create your LLM +client = openai.OpenAI(api_key=MY_API_KEY) +llm = OpenAI(client) + +# Load it in KeyLLM +kw_model = KeyLLM(llm) +``` + ## **Version 0.8.2** *Release date: 29 September, 2023* diff --git a/docs/guides/keyllm.md b/docs/guides/keyllm.md index 1f7496f7..f1913fc4 100644 --- a/docs/guides/keyllm.md +++ b/docs/guides/keyllm.md @@ -47,8 +47,8 @@ from keybert.llm import OpenAI from keybert import KeyLLM # Create your LLM -openai.api_key = "sk-..." -llm = OpenAI() +client = openai.OpenAI(api_key=MY_API_KEY) +llm = OpenAI(client) # Load it in KeyLLM kw_model = KeyLLM(llm) @@ -85,8 +85,6 @@ from keybert.llm import OpenAI from keybert import KeyLLM # Create your LLM -openai.api_key = "sk-..." - prompt = """ I have the following document: [DOCUMENT] @@ -96,7 +94,8 @@ Make sure to only extract keywords that appear in the text. Use the following format separated by commas: """ -llm = OpenAI() +client = openai.OpenAI(api_key=MY_API_KEY) +llm = OpenAI(client) # Load it in KeyLLM kw_model = KeyLLM(llm) @@ -129,8 +128,6 @@ from keybert.llm import OpenAI from keybert import KeyLLM # Create your LLM -openai.api_key = "sk-..." - prompt = """ I have the following document: [DOCUMENT] @@ -143,7 +140,8 @@ Based on the information above, improve the candidate keywords to best describe Use the following format separated by commas: """ -llm = OpenAI(model="gpt-3.5-turbo", prompt=prompt, chat=True) +client = openai.OpenAI(api_key=MY_API_KEY) +llm = OpenAI(client,model="gpt-3.5-turbo", prompt=prompt, chat=True) # Load it in KeyLLM kw_model = KeyLLM(llm) @@ -199,8 +197,8 @@ model = SentenceTransformer('all-MiniLM-L6-v2') embeddings = model.encode(documents, convert_to_tensor=True) # Create your LLM -openai.api_key = "sk-..." -llm = OpenAI() +client = openai.OpenAI(api_key=MY_API_KEY) +llm = OpenAI(client) # Load it in KeyLLM kw_model = KeyLLM(llm) @@ -245,8 +243,8 @@ from keybert.llm import OpenAI from keybert import KeyLLM, KeyBERT # Create your LLM -openai.api_key = "sk-..." -llm = OpenAI() +client = openai.OpenAI(api_key=MY_API_KEY) +llm = OpenAI(client) # Load it in KeyLLM kw_model = KeyBERT(llm=llm) diff --git a/docs/guides/llms.md b/docs/guides/llms.md index b57cf7d4..10e65cab 100644 --- a/docs/guides/llms.md +++ b/docs/guides/llms.md @@ -19,8 +19,8 @@ from keybert.llm import OpenAI from keybert import KeyLLM # Create your OpenAI LLM -openai.api_key = "sk-..." -llm = OpenAI() +client = openai.OpenAI(api_key=MY_API_KEY) +llm = OpenAI(client) # Load it in KeyLLM kw_model = KeyLLM(llm) @@ -37,8 +37,8 @@ from keybert.llm import OpenAI from keybert import KeyLLM # Create your LLM -openai.api_key = "sk-..." -llm = OpenAI(model="gpt-3.5-turbo", chat=True) +client = openai.OpenAI(api_key=MY_API_KEY) +llm = OpenAI(client, model="gpt-3.5-turbo", chat=True) # Load it in KeyLLM kw_model = KeyLLM(llm) diff --git a/keybert/_llm.py b/keybert/_llm.py index db04c609..f3b04fab 100644 --- a/keybert/_llm.py +++ b/keybert/_llm.py @@ -59,8 +59,8 @@ def extract_keywords( from keybert import KeyLLM # Create your LLM - openai.api_key = "sk-..." - llm = OpenAI() + client = openai.OpenAI(api_key=MY_API_KEY) + llm = OpenAI(client) # Load it in KeyLLM kw_model = KeyLLM(llm) diff --git a/keybert/llm/_openai.py b/keybert/llm/_openai.py index 178abee6..5c8c078c 100644 --- a/keybert/llm/_openai.py +++ b/keybert/llm/_openai.py @@ -50,6 +50,7 @@ class OpenAI(BaseLLM): keywords are comma-separated. Arguments: + client: A `openai.OpenAI` client model: Model to use within OpenAI, defaults to `"text-ada-001"`. NOTE: If a `gpt-3.5-turbo` model is used, make sure to set `chat` to True. @@ -85,8 +86,8 @@ class OpenAI(BaseLLM): from keybert import KeyLLM # Create your LLM - openai.api_key = "sk-..." - llm = OpenAI() + client = openai.OpenAI(api_key=MY_API_KEY) + llm = OpenAI(client) # Load it in KeyLLM kw_model = KeyLLM(llm) @@ -100,16 +101,17 @@ class OpenAI(BaseLLM): ```python prompt = "I have the following document: [DOCUMENT] \nThis document contains the following keywords separated by commas: '" - llm = OpenAI(prompt=prompt, delay_in_seconds=5) + llm = OpenAI(client, prompt=prompt, delay_in_seconds=5) ``` If you want to use OpenAI's ChatGPT model: ```python - llm = OpenAI(model="gpt-3.5-turbo", delay_in_seconds=10, chat=True) + llm = OpenAI(client, model="gpt-3.5-turbo", delay_in_seconds=10, chat=True) ``` """ def __init__(self, + client, model: str = "gpt-3.5-turbo-instruct", prompt: str = None, generator_kwargs: Mapping[str, Any] = {}, @@ -118,6 +120,7 @@ def __init__(self, chat: bool = False, verbose: bool = False ): + self.client = client self.model = model if prompt is None: @@ -172,39 +175,37 @@ def extract_keywords(self, documents: List[str], candidate_keywords: List[List[s ] kwargs = {"model": self.model, "messages": messages, **self.generator_kwargs} if self.exponential_backoff: - response = chat_completions_with_backoff(**kwargs) + response = chat_completions_with_backoff(self.client, **kwargs) else: - response = openai.ChatCompletion.create(**kwargs) - keywords = response["choices"][0]["message"]["content"].strip() + response = self.client.chat.completions.create(**kwargs) + keywords = response.choices[0].message.content.strip() # Use a non-chat model else: if self.exponential_backoff: - response = completions_with_backoff(model=self.model, prompt=prompt, **self.generator_kwargs) + response = completions_with_backoff(self.client, model=self.model, prompt=prompt, **self.generator_kwargs) else: - response = openai.Completion.create(model=self.model, prompt=prompt, **self.generator_kwargs) - keywords = response["choices"][0]["text"].strip() + response = self.client.completions.create(model=self.model, prompt=prompt, **self.generator_kwargs) + keywords = response.choices[0].message.content.strip() keywords = [keyword.strip() for keyword in keywords.split(",")] all_keywords.append(keywords) return all_keywords -def completions_with_backoff(**kwargs): +def completions_with_backoff(client, **kwargs): return retry_with_exponential_backoff( - openai.Completion.create, + client.completions.create, errors=( - openai.error.RateLimitError, - openai.error.ServiceUnavailableError, + openai.RateLimitError, ), )(**kwargs) -def chat_completions_with_backoff(**kwargs): +def chat_completions_with_backoff(client, **kwargs): return retry_with_exponential_backoff( - openai.ChatCompletion.create, + client.chat.completions.create, errors=( - openai.error.RateLimitError, - openai.error.ServiceUnavailableError, + openai.RateLimitError, ), )(**kwargs)