From 445e2462311d08f951d89decfa5577827bcd358d Mon Sep 17 00:00:00 2001
From: Maarten Grootendorst <maartengrootendorst@gmail.com>
Date: Wed, 13 Dec 2023 07:24:44 +0100
Subject: [PATCH] Support for OpenAI >= 1 (#189)

* Now only support for openai >= 1.0
* Use the `client` parameter to access the API (see docstrings)
---
 README.md              |  8 ++++----
 docs/changelog.md      | 20 ++++++++++++++++++++
 docs/guides/keyllm.md  | 22 ++++++++++------------
 docs/guides/llms.md    |  8 ++++----
 keybert/_llm.py        |  4 ++--
 keybert/llm/_openai.py | 37 +++++++++++++++++++------------------
 6 files changed, 59 insertions(+), 40 deletions(-)
diff --git a/README.md b/README.md
index adf9fa92..3e4e6a03 100644
--- a/README.md
+++ b/README.md
@@ -241,8 +241,8 @@ from keybert.llm import OpenAI
 from keybert import KeyLLM
 
 # Create your LLM
-openai.api_key = "sk-..."
-llm = OpenAI()
+client = openai.OpenAI(api_key=MY_API_KEY)
+llm = OpenAI(client)
 
 # Load it in KeyLLM
 kw_model = KeyLLM(llm)
@@ -265,8 +265,8 @@ model = SentenceTransformer('all-MiniLM-L6-v2')
 embeddings = model.encode(MY_DOCUMENTS, convert_to_tensor=True)
 
 # Create your LLM
-openai.api_key = "sk-..."
-llm = OpenAI()
+client = openai.OpenAI(api_key=MY_API_KEY)
+llm = OpenAI(client)
 
 # Load it in KeyLLM
 kw_model = KeyLLM(llm)
diff --git a/docs/changelog.md b/docs/changelog.md
index 7b673507..6dd594ed 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -3,6 +3,26 @@ hide:
   - navigation
 ---
 
+## **Version 0.8.3**
+*Release date: 29 November, 2023*
+
+* Fix support for openai>=1
+
+You can now use it as follows:
+
+```python
+import openai
+from keybert.llm import OpenAI
+from keybert import KeyLLM
+
+# Create your LLM
+client = openai.OpenAI(api_key=MY_API_KEY)
+llm = OpenAI(client)
+
+# Load it in KeyLLM
+kw_model = KeyLLM(llm)
+```
+
 ## **Version 0.8.2**
 *Release date: 29 September, 2023*
 
diff --git a/docs/guides/keyllm.md b/docs/guides/keyllm.md
index 1f7496f7..f1913fc4 100644
--- a/docs/guides/keyllm.md
+++ b/docs/guides/keyllm.md
@@ -47,8 +47,8 @@ from keybert.llm import OpenAI
 from keybert import KeyLLM
 
 # Create your LLM
-openai.api_key = "sk-..."
-llm = OpenAI()
+client = openai.OpenAI(api_key=MY_API_KEY)
+llm = OpenAI(client)
 
 # Load it in KeyLLM
 kw_model = KeyLLM(llm)
@@ -85,8 +85,6 @@ from keybert.llm import OpenAI
 from keybert import KeyLLM
 
 # Create your LLM
-openai.api_key = "sk-..."
-
 prompt = """
 I have the following document:
 [DOCUMENT]
@@ -96,7 +94,8 @@ Make sure to only extract keywords that appear in the text.
 Use the following format separated by commas:
 <keywords>
 """
-llm = OpenAI()
+client = openai.OpenAI(api_key=MY_API_KEY)
+llm = OpenAI(client)
 
 # Load it in KeyLLM
 kw_model = KeyLLM(llm)
@@ -129,8 +128,6 @@ from keybert.llm import OpenAI
 from keybert import KeyLLM
 
 # Create your LLM
-openai.api_key = "sk-..."
-
 prompt = """
 I have the following document:
 [DOCUMENT]
@@ -143,7 +140,8 @@ Based on the information above, improve the candidate keywords to best describe
 Use the following format separated by commas:
 <keywords>
 """
-llm = OpenAI(model="gpt-3.5-turbo", prompt=prompt, chat=True)
+client = openai.OpenAI(api_key=MY_API_KEY)
+llm = OpenAI(client,model="gpt-3.5-turbo", prompt=prompt, chat=True)
 
 # Load it in KeyLLM
 kw_model = KeyLLM(llm)
@@ -199,8 +197,8 @@ model = SentenceTransformer('all-MiniLM-L6-v2')
 embeddings = model.encode(documents, convert_to_tensor=True)
 
 # Create your LLM
-openai.api_key = "sk-..."
-llm = OpenAI()
+client = openai.OpenAI(api_key=MY_API_KEY)
+llm = OpenAI(client)
 
 # Load it in KeyLLM
 kw_model = KeyLLM(llm)
@@ -245,8 +243,8 @@ from keybert.llm import OpenAI
 from keybert import KeyLLM, KeyBERT
 
 # Create your LLM
-openai.api_key = "sk-..."
-llm = OpenAI()
+client = openai.OpenAI(api_key=MY_API_KEY)
+llm = OpenAI(client)
 
 # Load it in KeyLLM
 kw_model = KeyBERT(llm=llm)
diff --git a/docs/guides/llms.md b/docs/guides/llms.md
index b57cf7d4..10e65cab 100644
--- a/docs/guides/llms.md
+++ b/docs/guides/llms.md
@@ -19,8 +19,8 @@ from keybert.llm import OpenAI
 from keybert import KeyLLM
 
 # Create your OpenAI LLM
-openai.api_key = "sk-..."
-llm = OpenAI()
+client = openai.OpenAI(api_key=MY_API_KEY)
+llm = OpenAI(client)
 
 # Load it in KeyLLM
 kw_model = KeyLLM(llm)
@@ -37,8 +37,8 @@ from keybert.llm import OpenAI
 from keybert import KeyLLM
 
 # Create your LLM
-openai.api_key = "sk-..."
-llm = OpenAI(model="gpt-3.5-turbo", chat=True)
+client = openai.OpenAI(api_key=MY_API_KEY)
+llm = OpenAI(client, model="gpt-3.5-turbo", chat=True)
 
 # Load it in KeyLLM
 kw_model = KeyLLM(llm)
diff --git a/keybert/_llm.py b/keybert/_llm.py
index db04c609..f3b04fab 100644
--- a/keybert/_llm.py
+++ b/keybert/_llm.py
@@ -59,8 +59,8 @@ def extract_keywords(
         from keybert import KeyLLM
 
         # Create your LLM
-        openai.api_key = "sk-..."
-        llm = OpenAI()
+        client = openai.OpenAI(api_key=MY_API_KEY)
+        llm = OpenAI(client)
 
         # Load it in KeyLLM
         kw_model = KeyLLM(llm)
diff --git a/keybert/llm/_openai.py b/keybert/llm/_openai.py
index 178abee6..5c8c078c 100644
--- a/keybert/llm/_openai.py
+++ b/keybert/llm/_openai.py
@@ -50,6 +50,7 @@ class OpenAI(BaseLLM):
     keywords are comma-separated.
 
     Arguments:
+        client: A `openai.OpenAI` client
         model: Model to use within OpenAI, defaults to `"text-ada-001"`.
                NOTE: If a `gpt-3.5-turbo` model is used, make sure to set
                `chat` to True.
@@ -85,8 +86,8 @@ class OpenAI(BaseLLM):
     from keybert import KeyLLM
 
     # Create your LLM
-    openai.api_key = "sk-..."
-    llm = OpenAI()
+    client = openai.OpenAI(api_key=MY_API_KEY)
+    llm = OpenAI(client)
 
     # Load it in KeyLLM
     kw_model = KeyLLM(llm)
@@ -100,16 +101,17 @@ class OpenAI(BaseLLM):
 
     ```python
     prompt = "I have the following document: [DOCUMENT] \nThis document contains the following keywords separated by commas: '"
-    llm = OpenAI(prompt=prompt, delay_in_seconds=5)
+    llm = OpenAI(client, prompt=prompt, delay_in_seconds=5)
     ```
 
     If you want to use OpenAI's ChatGPT model:
 
     ```python
-    llm = OpenAI(model="gpt-3.5-turbo", delay_in_seconds=10, chat=True)
+    llm = OpenAI(client, model="gpt-3.5-turbo", delay_in_seconds=10, chat=True)
     ```
     """
     def __init__(self,
+                 client,
                  model: str = "gpt-3.5-turbo-instruct",
                  prompt: str = None,
                  generator_kwargs: Mapping[str, Any] = {},
@@ -118,6 +120,7 @@ def __init__(self,
                  chat: bool = False,
                  verbose: bool = False
                  ):
+        self.client = client
         self.model = model
 
         if prompt is None:
@@ -172,39 +175,37 @@ def extract_keywords(self, documents: List[str], candidate_keywords: List[List[s
                 ]
                 kwargs = {"model": self.model, "messages": messages, **self.generator_kwargs}
                 if self.exponential_backoff:
-                    response = chat_completions_with_backoff(**kwargs)
+                    response = chat_completions_with_backoff(self.client, **kwargs)
                 else:
-                    response = openai.ChatCompletion.create(**kwargs)
-                keywords = response["choices"][0]["message"]["content"].strip()
+                    response = self.client.chat.completions.create(**kwargs)
+                keywords = response.choices[0].message.content.strip()
 
             # Use a non-chat model
             else:
                 if self.exponential_backoff:
-                    response = completions_with_backoff(model=self.model, prompt=prompt, **self.generator_kwargs)
+                    response = completions_with_backoff(self.client, model=self.model, prompt=prompt, **self.generator_kwargs)
                 else:
-                    response = openai.Completion.create(model=self.model, prompt=prompt, **self.generator_kwargs)
-                keywords = response["choices"][0]["text"].strip()
+                    response = self.client.completions.create(model=self.model, prompt=prompt, **self.generator_kwargs)
+                keywords = response.choices[0].message.content.strip()
             keywords = [keyword.strip() for keyword in keywords.split(",")]
             all_keywords.append(keywords)
 
         return all_keywords
 
 
-def completions_with_backoff(**kwargs):
+def completions_with_backoff(client, **kwargs):
     return retry_with_exponential_backoff(
-        openai.Completion.create,
+        client.completions.create,
         errors=(
-            openai.error.RateLimitError,
-            openai.error.ServiceUnavailableError,
+            openai.RateLimitError,
         ),
     )(**kwargs)
 
 
-def chat_completions_with_backoff(**kwargs):
+def chat_completions_with_backoff(client, **kwargs):
     return retry_with_exponential_backoff(
-        openai.ChatCompletion.create,
+        client.chat.completions.create,
         errors=(
-            openai.error.RateLimitError,
-            openai.error.ServiceUnavailableError,
+            openai.RateLimitError,
         ),
     )(**kwargs)