-
Notifications
You must be signed in to change notification settings - Fork 0
/
background.js
285 lines (258 loc) · 11.8 KB
/
background.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
if (request.action === 'startCrawl') {
startCrawl(request.url).then(response => {
sendResponse(response);
});
} else if (request.action === 'checkCrawlStatus') {
checkCrawlStatus(request.crawlId).then(response => {
sendResponse(response);
});
}
return true;
});
chrome.runtime.onConnect.addListener(function(port) {
if (port.name === 'openaiStream') {
port.onMessage.addListener(function(request) {
if (request.action === 'askQuestionStream') {
askQuestionStream(request.question, port);
}
});
}
});
function startCrawl(url) {
return new Promise((resolve, reject) => {
chrome.storage.local.get(['firecrawlKey', 'maxDepth', 'limit', 'timeout', 'allowBackwardLinks', 'waitFor'], function(result) {
const firecrawlKey = result.firecrawlKey;
const maxDepth = result.maxDepth || 3;
const limit = result.limit || 50;
const timeout = result.timeout || 20000; // Default timeout in milliseconds
const allowBackwardLinks = typeof result.allowBackwardLinks !== 'undefined' ? result.allowBackwardLinks : true;
const waitFor = result.waitFor || 2000; // Default wait time in milliseconds
if (!firecrawlKey) {
resolve({ success: false, error: 'Firecrawl API key not set. Please set it in the options page.' });
return;
}
const apiUrl = 'https://api.firecrawl.dev/v1/crawl';
const data = {
url: url,
scrapeOptions: {
formats: ['markdown'],
waitFor: waitFor, // Use the waitFor setting
timeout: timeout // Use the timeout setting
},
limit: limit,
allowBackwardLinks: allowBackwardLinks, // Use the allowBackwardLinks setting
maxDepth: maxDepth
};
fetch(apiUrl, {
method: 'POST',
headers: {
'Authorization': 'Bearer ' + firecrawlKey,
'Content-Type': 'application/json'
},
body: JSON.stringify(data)
})
.then(response => response.json())
.then(data => {
if (data.success) {
// Store crawl ID and domain
const urlObj = new URL(url);
const domain = urlObj.hostname;
chrome.storage.local.set({ crawlId: data.id, currentDomain: domain }, function() {
// Initialize conversation history
chrome.storage.local.get(['conversationHistories'], function(res) {
let conversationHistories = res.conversationHistories || {};
conversationHistories[domain] = [];
chrome.storage.local.set({ conversationHistories: conversationHistories }, function() {
resolve({ success: true, crawlId: data.id });
});
});
});
} else {
resolve({ success: false, error: 'Failed to start crawl. Please check your API key and URL.' });
}
})
.catch(error => {
resolve({ success: false, error: error.message });
});
});
});
}
function checkCrawlStatus(crawlId) {
return new Promise((resolve, reject) => {
chrome.storage.local.get(['firecrawlKey'], function(result) {
const firecrawlKey = result.firecrawlKey;
if (!firecrawlKey) {
resolve({status: 'error', error: 'Firecrawl API key not set.'});
return;
}
const apiUrl = `https://api.firecrawl.dev/v1/crawl/${crawlId}`;
fetch(apiUrl, {
method: 'GET',
headers: {
'Authorization': 'Bearer ' + firecrawlKey
}
}).then(response => response.json())
.then(data => {
if (data.status === 'completed') {
chrome.storage.local.set({crawlData: data.data}, function() {
resolve({status: 'completed', total: data.total, completed: data.completed});
});
} else if (data.status === 'scraping') {
resolve({status: 'scraping', total: data.total, completed: data.completed});
} else {
resolve({status: 'error', error: 'Unknown crawl status.'});
}
}).catch(error => {
resolve({status: 'error', error: error.message});
});
});
});
}
function askQuestionStream(question, port) {
chrome.storage.local.get(['openaiKey', 'crawlData', 'currentDomain', 'conversationHistories', 'maxContentLength', 'model'], function(result) {
const openaiKey = result.openaiKey;
const crawlData = result.crawlData;
const domain = result.currentDomain;
let conversationHistories = result.conversationHistories || {};
let conversationHistory = conversationHistories[domain] || [];
const maxContentLength = result.maxContentLength || 250000; // Default max characters
const model = result.model || 'gpt-4o-mini'; // Default model
if (!openaiKey) {
port.postMessage({ error: 'OpenAI API key not set. Please set it in the options page.' });
port.disconnect();
return;
}
if (!crawlData) {
port.postMessage({ error: 'No crawl data available. Please start the crawl first.' });
port.disconnect();
return;
}
// Prepare the site content using maxContentLength
const pageCount = crawlData.length;
const maxPageLength = Math.floor(maxContentLength / pageCount);
let siteContent = '';
for (const page of crawlData) {
if (page.markdown) {
const title = page.metadata.title || 'No Title';
const url = page.metadata.sourceURL || 'No URL';
let pageContent = `Page Title: ${title}\nURL: ${url}\nContent:\n${page.markdown}\n\n`;
// Truncate each page's content if it exceeds the maximum allowed length per page
if (pageContent.length > maxPageLength) {
pageContent = pageContent.substring(0, maxPageLength) + '...';
}
siteContent += pageContent;
siteContent += "[END OF PAGE]\n";
// Break if the total content length exceeds the maximum allowed
if (siteContent.length > maxContentLength) {
siteContent = siteContent.substring(0, maxContentLength) + '...';
break;
}
}
}
// Get the current date in a readable format
const options = { year: 'numeric', month: 'long', day: 'numeric' };
const currentDate = new Date().toLocaleDateString('en-US', options);
const messages = [
{
"role": "system",
"content": `You are a helpful assistant that answers questions about the content of a website as of ${currentDate}. Use the provided page titles and URLs to reference where information comes from. Include links in Markdown format when appropriate.`
},
{
"role": "user",
"content": "Here is the content of the website:\n\n" + siteContent
}
];
// Add conversation history
messages.push(...conversationHistory);
// Add latest user question
messages.push({
"role": "user",
"content": question
});
const apiUrl = 'https://api.openai.com/v1/chat/completions';
const data = {
model: model,
messages: messages,
stream: true,
temperature: 0.1
};
fetch(apiUrl, {
method: 'POST',
headers: {
'Authorization': 'Bearer ' + openaiKey,
'Content-Type': 'application/json'
},
body: JSON.stringify(data)
})
.then(response => {
if (!response.ok) {
response.json().then(errorData => {
port.postMessage({ error: errorData.error.message });
port.disconnect();
});
return;
}
const reader = response.body.getReader();
const decoder = new TextDecoder('utf-8');
let assistantMessage = '';
let buffer = '';
function readChunk() {
reader.read().then(({ done, value }) => {
if (done) {
// Handle the end of the stream
port.postMessage({ done: true });
// Update conversation history
conversationHistory.push({ role: 'user', content: question });
conversationHistory.push({ role: 'assistant', content: assistantMessage });
conversationHistories[domain] = conversationHistory;
chrome.storage.local.set({ conversationHistories: conversationHistories });
port.disconnect();
return;
}
// Accumulate the decoded chunk in the buffer
buffer += decoder.decode(value);
// Split the buffer into lines
let lines = buffer.split('\n');
// Keep the last line in the buffer if it's incomplete
buffer = lines.pop();
for (const line of lines) {
if (line.trim() === '') continue;
const message = line.replace(/^data: /, '');
if (message === '[DONE]') {
port.postMessage({ done: true });
// Update conversation history
conversationHistory.push({ role: 'user', content: question });
conversationHistory.push({ role: 'assistant', content: assistantMessage });
conversationHistories[domain] = conversationHistory;
chrome.storage.local.set({ conversationHistories: conversationHistories });
port.disconnect();
return;
}
try {
const parsed = JSON.parse(message);
const content = parsed.choices[0].delta.content;
if (content) {
assistantMessage += content;
port.postMessage({ answer: content });
}
} catch (error) {
// Handle parsing errors if necessary
console.error('Error parsing stream message', error);
}
}
// Continue reading the next chunk
readChunk();
}).catch(error => {
port.postMessage({ error: error.message });
port.disconnect();
});
}
readChunk();
})
.catch(error => {
port.postMessage({ error: error.message });
port.disconnect();
});
});
}