-
Notifications
You must be signed in to change notification settings - Fork 237
/
ore.py
289 lines (240 loc) · 17.5 KB
/
ore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# MIT License
# Copyright (c) [2024] [Hamish Davison]
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "simplemode.py]"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import os
import openai
import time
import csv
import requests
from tqdm import tqdm
import concurrent.futures
import json
# Load configuration from a JSON file
with open('config.json') as config_file:
config = json.load(config_file)
# Set your OpenAI API key from the config file
OPENAI_API_TOKEN = config["OPENAI_API_TOKEN"]
print("Setting OpenAI API Key...")
os.environ["OPENAI_API_KEY"] = OPENAI_API_TOKEN
# Update your Freeimage.host API Key here from the config file
FREEIMAGE_HOST_API_KEY = config["FREEIMAGE_HOST_API_KEY"]
# Initialize the OpenAI client
print("Initializing OpenAI client...")
client = openai.OpenAI()
# Global list to store image URLs
image_urls = []
def upload_to_freeimage_host(image_path, Keyword):
"""
Uploads an image to Freeimage.host with {Keyword} in the filename.
Also stores the image URL in a global list.
"""
print(f"Uploading {image_path} to Freeimage.host...")
with open(image_path, 'rb') as image_file:
files = {'source': image_file}
data = {
'key': FREEIMAGE_HOST_API_KEY,
'action': 'upload',
'format': 'json',
'name': f'{Keyword}_image.png' # Add {Keyword} in the filename
}
response = requests.post('https://freeimage.host/api/1/upload', files=files, data=data)
if response.status_code == 200:
url = response.json().get('image', {}).get('url', '')
if url:
print(f"Uploaded successfully: {url}")
image_urls.append({'idea': Keyword, 'url': url}) # Store both idea and URL
return url
else:
print("Upload successful but no URL returned, something went wrong.")
else:
print(f"Failed to upload to Freeimage.host: {response.status_code}, {response.text}")
return None
def upload_file(file_path, purpose):
print(f"Uploading file: {file_path} for purpose: {purpose}")
with open(file_path, "rb") as file:
response = client.files.create(file=file, purpose=purpose)
print(f"File uploaded successfully, ID: {response.id}")
return response.id
def clear_image_urls():
"""
Clears the global list of image URLs.
"""
global image_urls
image_urls.clear()
print("Cleared global image URLs.")
print("Commencing file uploads...")
# Upload your files using paths from the config file
internal_links_file_id = upload_file(config["path_to_example_file"], 'assistants')
content_plan_file_id = upload_file(config["path_to_plan_csv"], 'assistants')
brand_plan_file_id = upload_file(config["path_to_example_file"], 'assistants')
brand_logo_file_id = upload_file(config["path_to_image_file"], 'assistants')
# Create an Assistant
print("Creating OpenAI Assistant...")
assistant = client.beta.assistants.create(
name="Content Creation Assistant",
model="gpt-4-turbo-preview",
instructions=f"YOU MUST NOT INVENT LINKS. YOU ONLY USE IMAGE LINKS FROM products.txt and brandimagesandlinks.txt You are writing for {config['business_name']}. Choose images and internal links from {config['path_to_image_file']} embed them with markdown in the final article. You must never EVER invent internal links or image links as this can destroy my SEO. YOU MUST INCLUDE INTERNAL LINKS FROM {config['path_to_image_file']} - read this first and make sure to include real internal links in the final article in the blog post When told to use retrieval use retrieval, when told to use code_interpreter use code interpreter. The final content should include internal links and embedded images from {config['path_to_image_file']} and should include formatting. Your basic steps are: 1. read {config['path_to_image_file']}, get the image, create some visualizations of data, store these for the final article. 2. Find relevant brand images and internal links from {config['path_to_image_file']}, create an outline, then write an article with all of this data you've either created or found Copy the tone from {config['path_to_example_file']} EXACTLY. Read {config['path_to_example_file']}. Use this as a guide to shape the final {config['page_type']}. The {config['page_type']} should follow the length and tone of {config['path_to_example_file']}. You are SEOGPT, aiming to create in-depth and interesting blog posts for {config['business_name']}, an {config['business_type']} in {config['country']}, you should write at a grade 7 level {config['language']} Every blog post should include at least 3 images and links to their other pages from {config['business_name']}.. Ensure the brand image links are accurate. Choose only relevant brand pages. Do not invent image links. Pick 5 strictly relevant brand images and internal links for the articles. First, read the attached files, then create a detailed outline for a {config['page_type']}, including up to 5 highly relevant internal collection links and brand image links.",
tools=[{"type": "retrieval"}, {"type": "code_interpreter"}],
file_ids=[internal_links_file_id, content_plan_file_id, brand_plan_file_id, brand_logo_file_id,]
)
print("Assistant created successfully.")
def wait_for_run_completion(thread_id, run_id, timeout=300):
print(f"Waiting for run completion, thread ID: {thread_id}, run ID: {run_id}")
start_time = time.time()
while time.time() - start_time < timeout:
run_status = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
if run_status.status == 'completed':
print("Run completed successfully.")
return run_status
time.sleep(10)
raise TimeoutError("Run did not complete within the specified timeout.")
def perplexity_research(Keyword, max_retries=3, delay=5):
"""
Conducts perplexity research with retries on failure.
Args:
Keyword (str): The blog post idea to research.
max_retries (int): Maximum number of retries.
delay (int): Delay in seconds before retrying.
Returns:
dict or None: The response from the API or None if failed.
"""
print(f"Starting perplexity research for: {Keyword}")
url = "https://api.perplexity.ai/chat/completions"
payload = {
"model": "pplx-70b-online",
"messages": [
{
"role": "system",
"content": "Be precise and concise."
},
{
"role": "user",
"content": f"Find highly specific generalised data about {Keyword} in 2024. Do not give me any information about specific brands."
}
]
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"authorization": f"Bearer {config['PERPLEXITY_API_KEY']}"
}
for attempt in range(max_retries):
response = requests.post(url, json=payload, headers=headers)
if response.status_code == 200:
print("Perplexity research completed successfully.")
try:
return response.json()
except ValueError:
print("JSON decoding failed")
return None
else:
print(f"Perplexity research failed with status code: {response.status_code}. Attempt {attempt + 1} of {max_retries}.")
time.sleep(delay)
print("Perplexity research failed after maximum retries.")
return None
def get_internal_links(thread_id, Keyword):
print(f"Fetching internal links relevant to: {Keyword}")
get_request = f"Use Retrieval. Read products.txt and brandimagesandlinks.txt, Choose 5 relevant pages and store their links for internal links that are relevant to {Keyword}. Don't have more than 5. Then from products.txt please extract the EXACT product image URLs and product links"
client.beta.threads.messages.create(thread_id=thread_id, role="user", content=get_request)
get_request_run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=assistant.id)
wait_for_run_completion(thread_id, get_request_run.id)
messages = client.beta.threads.messages.list(thread_id=thread_id)
print("Internal links fetched successfully.")
return next((m.content for m in messages.data if m.role == "assistant"), None)
def create_data_vis(thread_id, perplexity_research, Keyword):
print("Creating data visualizations...")
for _ in range(3): # Loop to generate 3 visualizations
get_request = f"Use Code Interpreter - invent a VERY simple Visualization of some interesting data from {perplexity_research}."
client.beta.threads.messages.create(thread_id=thread_id, role="user", content=get_request)
get_request_run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=assistant.id)
wait_for_run_completion(thread_id, get_request_run.id)
messages = client.beta.threads.messages.list(thread_id=thread_id)
if hasattr(messages.data[0].content[0], 'image_file'):
file_id = messages.data[0].content[0].image_file.file_id
image_data = client.files.content(file_id)
image_data_bytes = image_data.read()
image_path = f"./visualization_image_{_}.png"
with open(image_path, "wb") as file:
file.write(image_data_bytes)
print(f"Visualization {_+1} created, attempting upload...")
upload_to_freeimage_host(image_path, Keyword)
else:
print(f"No image file found in response for visualization {_+1}. Attempt aborted.")
def process_blog_post(thread_id, Keyword):
print(f"Processing blog post for: {Keyword}")
research_results = perplexity_research(Keyword)
research_info = str(research_results)
create_data_vis(thread_id, research_info, Keyword)
internal_links = get_internal_links(thread_id, Keyword)
# Only include relevant image URLs for the current blog post idea
relevant_image_urls = [img['url'] for img in image_urls if img['idea'] == Keyword]
images_for_request = " ".join(relevant_image_urls)
outline_request = f"Use retrieval. Look at brandimagesandlinks.txt and products.txt. Look for keyword matches between the {Keyword} and the products.txt file. Look for highly relevant images. Only include highly relevant images in the outline. Create a SHORT outline for a {config['page_type']} based on {perplexity_research}. Also include data visualizations from {create_data_vis} Do not invent image links. use the product images and internal links from {internal_links} and the include the custom graphs from {images_for_request} and use them to create an outline for a {config['page_type']} about {Keyword}' In the outline do not use sources or footnotes, but just add a relevant product images in a relevant section, and a relevant internal link in a relevant section. There is no need for a lot of sources, each article needs a minimum of 5 brand images and internal links."
client.beta.threads.messages.create(thread_id=thread_id, role="user", content=outline_request)
outline_run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=assistant.id)
wait_for_run_completion(thread_id, outline_run.id)
messages = client.beta.threads.messages.list(thread_id=thread_id)
outline = next((m.content for m in messages.data if m.role == "assistant"), None)
article = None
if outline:
article_request = f"Embed at least 4 highly relevant images into the final article. When embedding an image ensure it's exactly as it is written inside products.txt Write a short, snappy article in {config['language']} Write at a grade 7 level. ONLY USE INTERNAL LINKS FROM {internal_links} You never invent internal links or image links. Include images from {create_data_vis} also include real internal links from brandimagesandlinks.txt Based on \n{outline} and Make sure to use a mix of the {images_for_request} and brand images. Include highly specific information from {research_results}. Do not use overly creative or crazy language. Use a {config['tone']} tone of voice. Write as if writing for The Guardian newspaper.. Just give information. Don't write like a magazine. Use simple language. Do not invent image links. You are writing from a first person plural perspective for the business, refer to it in the first person plural. Add a key takeaway table at the top of the article, summarzing the main points. Never invent links or brand images Choose 5 internal links and 5 brand images that are relevant to a pillar page and then create a pillar page with good formatting based on the following outline:\n{outline}, Title should be around 60 characters. Include the brand images and internal links to other pillar pages naturally and with relevance inside the {config['page_type']}. Use markdown formatting and ensure to use tables and lists to add to formatting. Use 3 relevant brand images and pillar pages with internal links maximum. Never invent any internal links. Include all of the internal links and brand images from {outline} Use different formatting to enrich the pillar page. Always include a table at the very top wtih key takeaways, also include lists to make more engaging content. Use Based on the outline: {outline}, create an article. Use {images_for_request} with the image name inside [] and with the link from {images_for_request} in order to enrich the content, create a pillar page about this topic. Use the brand images and internal links gathered from {internal_links}. Use {research_info} to make the more relevant. The end product shuold look like {config['path_to_example_file']} as an example"
client.beta.threads.messages.create(thread_id=thread_id, role="user", content=article_request)
article_run = client.beta.threads.runs.create(thread_id=thread_id, assistant_id=assistant.id)
wait_for_run_completion(thread_id, article_run.id)
messages = client.beta.threads.messages.list(thread_id=thread_id)
article = next((m.content for m in messages.data if m.role == "assistant"), None)
if article:
print("Article created successfully.")
clear_image_urls() # Call the new function here to clear the image URLs
else:
print("Failed to create an article.")
print(article)
return outline, article
def process_keywords_concurrent():
input_file = 'keywords.csv'
output_file = 'processed_keywords.csv'
# Corrected fieldnames array to include a missing comma and ensure it matches expected output
fieldnames = ['Keyword', 'Outline', 'Article', 'Processed']
# Read all rows to be processed
with open(input_file, newline='', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile)
rows_to_process = [row for row in reader]
# Process each blog post idea concurrently
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
future_to_row = {executor.submit(process_blog_post, client.beta.threads.create().id, row['Keyword']): row for row in rows_to_process}
# Initialize tqdm progress bar
progress = tqdm(concurrent.futures.as_completed(future_to_row), total=len(rows_to_process), desc="Processing Keywords")
# Collect results first to avoid writing to the file inside the loop
results = []
for future in progress:
row = future_to_row[future]
try:
outline, article = future.result() # Assuming this returns an outline and an article
# Create a new dictionary for CSV output to ensure it matches the specified fieldnames
processed_row = {
'Keyword': row['Keyword'],
'Outline': outline,
'Article': article,
'Processed': 'Yes'
}
results.append(processed_row)
except Exception as exc:
print(f'Keyword {row["Keyword"]} generated an exception: {exc}')
# Handle failed processing by marking as 'Failed' but still match the fieldnames
processed_row = {
'Keyword': row['Keyword'],
'Outline': '', # or you might use 'N/A' or similar placeholder
'Article': '', # same as above
'Processed': 'Failed'
}
results.append(processed_row)
# Write all results to the output file after processing
with open(output_file, 'w', newline='', encoding='utf-8') as f_output: # Use 'w' to overwrite or create anew
writer = csv.DictWriter(f_output, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(results)
# Example usage
if __name__ == "__main__":
process_keywords_concurrent()