diff --git a/.eslintrc.js b/.eslintrc.js index d30c0a1e..199206d4 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -7,4 +7,7 @@ module.exports = { parserOptions: { project: true, }, + rules: { + "@typescript-eslint/consistent-type-imports": "off", + }, }; diff --git a/.husky/_/pre-commit b/.husky/_/pre-commit index 921078d7..38c1a01d 100755 --- a/.husky/_/pre-commit +++ b/.husky/_/pre-commit @@ -1,3 +1,3 @@ #!/bin/sh . "$(dirname "$0")/husky.sh" -bun run lint-staged \ No newline at end of file +pnpm dlx lint-staged \ No newline at end of file diff --git a/.npmrc b/.npmrc new file mode 100644 index 00000000..361b1968 --- /dev/null +++ b/.npmrc @@ -0,0 +1,2 @@ +registry=https://registry.npmjs.org/ +link-workspace-packages=true diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..eeeb7be5 --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +pnpm-lock.yaml \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 61a3bb7c..15394cb2 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,6 +2,6 @@ "css.validate": false, "editor.quickSuggestions": { "strings": true - }, - "typescript.tsdk": "node_modules/typescript/lib" + }, + "typescript.tsdk": "node_modules/typescript/lib" } diff --git a/README.md b/README.md index 4bd34791..24748945 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Interested in helping build the best second brain for everyone? Join the discord ## 👀 What is this? -Build your own second brain with supermemory. It's a ChatGPT for your bookmarks. Import tweets or save websites and content using the [chrome extension](https://chromewebstore.google.com/detail/supermemory/afpgkkipfdpeaflnpoaffkcankadgjfc?hl=en-GB&authuser=0) (the extension on webstore is not updated, please use the one in the repo) +Build your own second brain with supermemory. It's a ChatGPT for your bookmarks. Import tweets or save websites and content using the [chrome extension](https://chromewebstore.google.com/detail/supermemory/afpgkkipfdpeaflnpoaffkcankadgjfc?hl=en-GB&authuser=0) Well, here's the thing - me and @yxshv save a _lot_ of content on the internet. diff --git a/SETUP-GUIDE.md b/SETUP-GUIDE.md index 72052931..46a2e1da 100644 --- a/SETUP-GUIDE.md +++ b/SETUP-GUIDE.md @@ -4,17 +4,16 @@ This guide will help you set up your own instance of Supermemory. This is necces ## Prerequisites -- [bun](https://bun.sh/) +- [pnpm](https://pnpm.io/installation): pnpm is used as a package manager. You can enable pnpm by running `corepack enable pnpm` in your terminal. - [turbo](https://turbo.build/repo/docs/installing) - [wrangler](https://developers.cloudflare.com/workers/cli-wrangler/install-update) -- [yarn](https://yarnpkg.com/getting-started/install): yarn is required to run scripts using turborepo. bun is not supported by turborepo yet vercel/turbo#4762 - [Cloudflare Workers](https://developers.cloudflare.com/workers/platform/pricing/): You also need to have a paid Workers plan to use the vectorize feature which is needed run the AI backend. It is currently $5/mo + usage costs. - [Cloudflare R2](https://developers.cloudflare.com/r2/): You need to enable R2 in the Cloudflare Dashboard for use in the web app. ## Steps 1. Clone the repo -2. Run `bun install` in the root directory +2. Run `pnpm install` in the root directory ### web @@ -37,11 +36,11 @@ BACKEND_BASE_URL="http://localhost:8686" 3. KV Namespaces ```bash -bunx wrangler kv namespace create canvas-snaps +pnpx wrangler kv namespace create canvas-snaps ``` ```bash -bunx wrangler kv namespace create recommendations +pnpx wrangler kv namespace create recommendations ``` Do not change the binding value in the `wrangler.toml` but update the id for the namespaces with the values you get from the above commands. @@ -49,7 +48,7 @@ Do not change the binding value in the `wrangler.toml` but update the id for the 4. R2 Storage ```bash -bunx wrangler r2 bucket create supermemory-r2 +pnpx wrangler r2 bucket create supermemory-r2 ``` Update bucket_name in the `wrangler.toml` file in `apps/web` to `supermemory-r2` @@ -57,13 +56,13 @@ Update bucket_name in the `wrangler.toml` file in `apps/web` to `supermemory-r2` 5. D1 Database ```bash -bunx wrangler d1 create supermemory-db-prod +pnpx wrangler d1 create supermemory-db-prod ``` Update the database_name and database_id in `[[env.production.d1_databases]]` with the values you get from the above command. ```bash -bunx wrangler d1 create supermemory-db-preview +pnpx wrangler d1 create supermemory-db-preview ``` Update the database_name and database_id in `[[d1_databases]]` and `[[env.preview.d1_databases]]` with the values you get from the above command. @@ -81,12 +80,12 @@ database_id = "YOUR_DB_ID" Simply run this command in `apps/web` ```bash -bunx wrangler d1 migrations apply supermemory-db-preview +pnpx wrangler d1 migrations apply supermemory-db-preview ``` If it runs, you can set up the cloud database as well by add the `--remote` flag, -if you just want to contribute to frontend then just run `bun run dev` in the root of the project and done! (you won't be able to try ai stuff), otherwise continue... +if you just want to contribute to frontend then just run `pnpm run dev` in the root of the project and done! (you won't be able to try ai stuff), otherwise continue... ### cf-ai-backend @@ -104,7 +103,7 @@ OPENAI_API_KEY="sk-" > Note: You need to use the workers paid plan to use vectorize for now. ```bash -bunx wrangler vectorize create --dimensions=1536 supermemory --metric=cosine +pnpx wrangler vectorize create --dimensions=1536 supermemory --metric=cosine ``` Update the index_name for `[[vectorize]]` in `wrangler.toml` file in `apps/cf-ai-backend` with the `supermemory` or the name you used in the above command. @@ -112,27 +111,27 @@ Update the index_name for `[[vectorize]]` in `wrangler.toml` file in `apps/cf-ai 3. Create KV namespaces for the `cf-ai-backend` module ```bash -bunx wrangler kv namespace create prod +pnpx wrangler kv namespace create prod ``` Update the id in `[[kv_namespaces]]` in the `wrangler.toml` file in `apps/cf-ai-backend` with the value you get from the above command. ```bash -bunx wrangler kv namespace create preview +pnpx wrangler kv namespace create preview ``` Update the preview_id in `[[kv_namespaces]]` in the `wrangler.toml` file in `apps/cf-ai-backend` with the value you get from the above command. ## Local Development -- Run `bun dev` in the root directory and Voila! You have your own supermemory instance running! +- Run `pnpm dev` in the root directory and Voila! You have your own supermemory instance running! > [!NOTE] -> It sometimes takes multiple tries to successfully run the `bun dev` command. If you encounter any issues, try running the command again. +> It sometimes takes multiple tries to successfully run the `pnpm dev` command. If you encounter any issues, try running the command again. ## Deploying -To deploy the web app, run `bun deploy` in the `apps/web` directory. +To deploy the web app, run `pnpm run deploy` in the `apps/web` directory. To deploy the cf-ai-backend module, run `wrangler publish` in the `apps/cf-ai-backend` directory. -To get the extension running, you need to build it first. Run `bun build` in the `apps/extension` directory and then load the extension in chrome. +To get the extension running, you need to build it first. Run `pnpm build` in the `apps/extension` directory and then load the extension in chrome. diff --git a/apps/cf-ai-backend/package.json b/apps/cf-ai-backend/package.json index fee0c0d8..2b83cc93 100644 --- a/apps/cf-ai-backend/package.json +++ b/apps/cf-ai-backend/package.json @@ -12,6 +12,10 @@ }, "license": "MIT", "dependencies": { - "@hono/zod-validator": "^0.2.1" - } + "@hono/zod-validator": "^0.2.1", + "hono": "^4.5.1" + }, + "devDependencies": { + "@cloudflare/workers-types": "^4.20240614.0" + } } diff --git a/apps/cf-ai-backend/src/helper.ts b/apps/cf-ai-backend/src/helper.ts index c54dde9f..2a68879a 100644 --- a/apps/cf-ai-backend/src/helper.ts +++ b/apps/cf-ai-backend/src/helper.ts @@ -1,5 +1,5 @@ import { Context } from "hono"; -import { Env, vectorObj } from "./types"; +import { Env, vectorObj, Chunks } from "./types"; import { CloudflareVectorizeStore } from "@langchain/cloudflare"; import { OpenAIEmbeddings } from "./utils/OpenAIEmbedder"; import { createOpenAI } from "@ai-sdk/openai"; @@ -7,6 +7,7 @@ import { createGoogleGenerativeAI } from "@ai-sdk/google"; import { createAnthropic } from "@ai-sdk/anthropic"; import { z } from "zod"; import { seededRandom } from "./utils/seededRandom"; +import { bulkInsertKv } from "./utils/kvBulkInsert"; export async function initQuery( c: Context<{ Bindings: Env }>, @@ -135,7 +136,7 @@ export async function batchCreateChunksAndEmbeddings({ }: { store: CloudflareVectorizeStore; body: z.infer; - chunks: string[]; + chunks: Chunks; context: Context<{ Bindings: Env }>; }) { //! NOTE that we use #supermemory-web to ensure that @@ -150,15 +151,25 @@ export async function batchCreateChunksAndEmbeddings({ const allIds = await context.env.KV.list({ prefix: uuid }); - let pageContent = ""; // If some chunks for that content already exist, we'll just update the metadata to include // the user. if (allIds.keys.length > 0) { const savedVectorIds = allIds.keys.map((key) => key.name); - const vectors = await context.env.VECTORIZE_INDEX.getByIds(savedVectorIds); - + const vectors = []; + //Search in a batch of 20 + for (let i = 0; i < savedVectorIds.length; i += 20) { + const batch = savedVectorIds.slice(i, i + 20); + const batchVectors = await context.env.VECTORIZE_INDEX.getByIds(batch); + vectors.push(...batchVectors); + } + console.log( + vectors.map((vector) => { + return vector.id; + }), + ); // Now, we'll update all vector metadatas with one more userId and all spaceIds const newVectors = vectors.map((vector) => { + console.log(JSON.stringify(vector.metadata)); vector.metadata = { ...vector.metadata, [`user-${body.user}`]: 1, @@ -169,51 +180,187 @@ export async function batchCreateChunksAndEmbeddings({ return acc; }, {}), }; - const content = - vector.metadata.content.toString().split("Content: ")[1] || - vector.metadata.content; - pageContent += `<---chunkId: ${vector.id}\n${content}\n---->`; return vector; }); - await context.env.VECTORIZE_INDEX.upsert(newVectors); - return pageContent; //Return the page content that goes to d1 db + // upsert in batch of 20 + const results = []; + for (let i = 0; i < newVectors.length; i += 20) { + results.push(newVectors.slice(i, i + 20)); + console.log(JSON.stringify(newVectors[1].id)); + } + + await Promise.all( + results.map((result) => { + return context.env.VECTORIZE_INDEX.upsert(result); + }), + ); + return; } - for (let i = 0; i < chunks.length; i++) { - const chunk = chunks[i]; - const chunkId = `${uuid}-${i}`; + switch (chunks.type) { + case "tweet": + { + const commonMetaData = { + type: body.type ?? "tweet", + title: body.title?.slice(0, 50) ?? "", + description: body.description ?? "", + url: body.url, + [sanitizeKey(`user-${body.user}`)]: 1, + }; - const newPageContent = `Title: ${body.title}\nDescription: ${body.description}\nURL: ${body.url}\nContent: ${chunk}`; + const spaceMetadata = body.spaces?.reduce((acc, space) => { + acc[`space-${body.user}-${space}`] = 1; + return acc; + }, {}); - const docs = await store.addDocuments( - [ - { - pageContent: newPageContent, - metadata: { - title: body.title?.slice(0, 50) ?? "", - description: body.description ?? "", - url: body.url, - type: body.type ?? "page", - content: newPageContent, - - [sanitizeKey(`user-${body.user}`)]: 1, - ...body.spaces?.reduce((acc, space) => { - acc[`space-${body.user}-${space}`] = 1; - return acc; - }, {}), - }, - }, - ], + const ids = []; + const preparedDocuments = chunks.chunks + .map((tweet, i) => { + return tweet.chunkedTweet.map((chunk) => { + const id = `${uuid}-${i}`; + ids.push(id); + const { tweetLinks, tweetVids, tweetId, tweetImages } = + tweet.metadata; + return { + pageContent: chunk, + metadata: { + content: chunk, + links: tweetLinks, + videos: tweetVids, + tweetId: tweetId, + tweetImages: tweetImages, + ...commonMetaData, + ...spaceMetadata, + }, + }; + }); + }) + .flat(); + + const docs = await store.addDocuments(preparedDocuments, { + ids: ids, + }); + console.log("these are the doucment ids", ids); + console.log("Docs added:", docs); + const { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID } = + context.env; + await bulkInsertKv( + { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID }, + { chunkIds: ids, urlid: ourID }, + ); + } + break; + case "page": { - ids: [chunkId], - }, - ); + const commonMetaData = { + type: body.type ?? "page", + title: body.title?.slice(0, 50) ?? "", + description: body.description ?? "", + url: body.url, + [sanitizeKey(`user-${body.user}`)]: 1, + }; + const spaceMetadata = body.spaces?.reduce((acc, space) => { + acc[`space-${body.user}-${space}`] = 1; + return acc; + }, {}); - console.log("Docs added: ", docs); + const ids = []; + const preparedDocuments = chunks.chunks.map((chunk, i) => { + const id = `${uuid}-${i}`; + ids.push(id); + return { + pageContent: chunk, + metadata: { + content: chunk, + ...commonMetaData, + ...spaceMetadata, + }, + }; + }); - await context.env.KV.put(chunkId, ourID); - pageContent += `<---chunkId: ${chunkId}\n${chunk}\n---->`; + const docs = await store.addDocuments(preparedDocuments, { ids: ids }); + console.log("Docs added:", docs); + const { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID } = + context.env; + await bulkInsertKv( + { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID }, + { chunkIds: ids, urlid: ourID }, + ); + } + break; + case "note": + { + const commonMetaData = { + title: body.title?.slice(0, 50) ?? "", + type: body.type ?? "page", + description: body.description ?? "", + url: body.url, + [sanitizeKey(`user-${body.user}`)]: 1, + }; + const spaceMetadata = body.spaces?.reduce((acc, space) => { + acc[`space-${body.user}-${space}`] = 1; + return acc; + }, {}); + + const ids = []; + const preparedDocuments = chunks.chunks.map((chunk, i) => { + const id = `${uuid}-${i}`; + ids.push(id); + return { + pageContent: chunk, + metadata: { + content: chunk, + ...commonMetaData, + ...spaceMetadata, + }, + }; + }); + + const docs = await store.addDocuments(preparedDocuments, { ids: ids }); + console.log("Docs added:", docs); + const { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID } = + context.env; + await bulkInsertKv( + { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID }, + { chunkIds: ids, urlid: ourID }, + ); + } + break; + case "image": { + const commonMetaData = { + type: body.type ?? "image", + title: body.title, + description: body.description ?? "", + url: body.url, + [sanitizeKey(`user-${body.user}`)]: 1, + }; + const spaceMetadata = body.spaces?.reduce((acc, space) => { + acc[`space-${body.user}-${space}`] = 1; + return acc; + }, {}); + + const ids = []; + const preparedDocuments = chunks.chunks.map((chunk, i) => { + const id = `${uuid}-${i}`; + ids.push(id); + return { + pageContent: chunk, + metadata: { + ...commonMetaData, + ...spaceMetadata, + }, + }; + }); + + const docs = await store.addDocuments(preparedDocuments, { ids: ids }); + console.log("Docs added:", docs); + const { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID } = context.env; + await bulkInsertKv( + { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID }, + { chunkIds: ids, urlid: ourID }, + ); + } } - return pageContent; // Return the pageContent that goes to the d1 db + return; } diff --git a/apps/cf-ai-backend/src/index.ts b/apps/cf-ai-backend/src/index.ts index edfe31c2..629ff172 100644 --- a/apps/cf-ai-backend/src/index.ts +++ b/apps/cf-ai-backend/src/index.ts @@ -1,7 +1,15 @@ import { z } from "zod"; import { Hono } from "hono"; import { CoreMessage, generateText, streamText, tool } from "ai"; -import { chatObj, Env, vectorObj } from "./types"; +import { + chatObj, + Chunks, + Env, + ImageChunks, + PageOrNoteChunks, + TweetChunks, + vectorObj, +} from "./types"; import { batchCreateChunksAndEmbeddings, deleteDocument, @@ -15,6 +23,8 @@ import { zValidator } from "@hono/zod-validator"; import chunkText from "./utils/chonker"; import { systemPrompt, template } from "./prompts/prompt1"; import { swaggerUI } from "@hono/swagger-ui"; +import { chunkThread } from "./utils/chunkTweet"; +import { chunkNote, chunkPage } from "./utils/chunkPageOrNotes"; const app = new Hono<{ Bindings: Env }>(); @@ -59,42 +69,42 @@ app.get("/api/health", (c) => { }); app.post("/api/add", zValidator("json", vectorObj), async (c) => { - const body = c.req.valid("json"); + try { + const body = c.req.valid("json"); - const { store } = await initQuery(c); + const { store } = await initQuery(c); - console.log(body.spaces); + console.log(body.spaces); + let chunks: TweetChunks | PageOrNoteChunks; + // remove everything in tags + // const newPageContent = body.pageContent?.replace(/.*?<\/raw>/g, ""); - // remove everything in tags - const newPageContent = body.pageContent?.replace(/.*?<\/raw>/g, ""); + switch (body.type) { + case "tweet": + chunks = chunkThread(body.pageContent); + break; - const chunks = chunkText(newPageContent, 1536); + case "page": + chunks = chunkPage(body.pageContent); + break; - const chunksOf20 = chunks.reduce((acc, chunk, index) => { - if (index % 20 === 0) { - acc.push([chunk]); - } else { - acc[acc.length - 1].push(chunk); + case "note": + chunks = chunkNote(body.pageContent); + break; } - return acc; - }, [] as string[][]); - - const accumChunkedInputs = []; - const promises = chunksOf20.map(async (chunkGroup) => { - const chunkedInput = await batchCreateChunksAndEmbeddings({ + await batchCreateChunksAndEmbeddings({ store, body, - chunks: chunkGroup, + chunks: chunks, context: c, }); - accumChunkedInputs.push(chunkedInput); - }); - - await Promise.all(promises); - - return c.json({ status: "ok", chunkedInput: accumChunkedInputs }); + return c.json({ status: "ok" }); + } catch (error) { + console.error("Error processing request:", error); + return c.json({ status: "error", message: error.message }, 500); + } }); app.post( @@ -147,6 +157,13 @@ app.post( ); const imageDescriptions = await Promise.all(imagePromises); + const chunks: ImageChunks = { + type: "image", + chunks: [ + imageDescriptions, + ...(body.text ? chunkText(body.text, 1536) : []), + ].flat(), + }; await batchCreateChunksAndEmbeddings({ store, @@ -162,10 +179,7 @@ app.post( pageContent: imageDescriptions.join("\n"), title: "Image content from the web", }, - chunks: [ - imageDescriptions, - ...(body.text ? chunkText(body.text, 1536) : []), - ].flat(), + chunks: chunks, context: c, }); @@ -263,7 +277,7 @@ app.post( // This is a "router". this finds out if the user wants to add a document, or chat with the AI to get a response. const routerQuery = await generateText({ model: model, - system: `You are Supermemory chatbot. You can either add a document to the supermemory database, or return a chat response. Based on this query, + system: `You are Supermemory chatbot. You can either add a document to the supermemory database, or return a chat response. Based on this query, You must determine what to do. Basically if it feels like a "question", then you should intiate a chat. If it feels like a "command" or feels like something that could be forwarded to the AI, then you should add a document. You must also extract the "thing" to add and what type of thing it is.`, prompt: `Question from user: ${query}`, @@ -289,7 +303,9 @@ app.post( if ((task as string) === "add") { // addString is the plaintext string that the user wants to add to the database + //chunk the note let addString: string = addContent; + let vectorContent: Chunks = chunkNote(addContent); if (thingToAdd === "page") { // TODO: Sometimes this query hangs, and errors out. we need to do proper error management here. @@ -300,6 +316,7 @@ app.post( }); addString = await response.text(); + vectorContent = chunkPage(addString); } // At this point, we can just go ahead and create the embeddings! @@ -312,7 +329,7 @@ app.post( pageContent: addString, title: `${addString.slice(0, 30)}... (Added from chatbot)`, }, - chunks: chunkText(addString, 1536), + chunks: vectorContent, context: c, }); @@ -561,7 +578,7 @@ app.post( const metadata = normalizedData.map((datapoint) => datapoint.metadata); return c.json({ - ids: storedContent, + ids: storedContent.filter(Boolean), metadata, normalizedData, proModeListedQueries, diff --git a/apps/cf-ai-backend/src/types.ts b/apps/cf-ai-backend/src/types.ts index 4db568a1..5ef81f20 100644 --- a/apps/cf-ai-backend/src/types.ts +++ b/apps/cf-ai-backend/src/types.ts @@ -1,5 +1,6 @@ import { sourcesZod } from "@repo/shared-types"; import { z } from "zod"; +import { ThreadTweetData } from "./utils/chunkTweet"; export type Env = { VECTORIZE_INDEX: VectorizeIndex; @@ -7,6 +8,9 @@ export type Env = { SECURITY_KEY: string; OPENAI_API_KEY: string; GOOGLE_AI_API_KEY: string; + CF_KV_AUTH_TOKEN: string; + KV_NAMESPACE_ID: string; + CF_ACCOUNT_ID: string; MY_QUEUE: Queue; KV: KVNamespace; MYBROWSER: unknown; @@ -23,6 +27,32 @@ export interface TweetData { saveToUser: string; } +interface BaseChunks { + type: "tweet" | "page" | "note" | "image"; +} + +export interface TweetChunks extends BaseChunks { + type: "tweet"; + chunks: Array; +} + +export interface PageOrNoteChunks extends BaseChunks { + type: "page" | "note"; + chunks: string[]; +} +export interface ImageChunks extends BaseChunks { + type: "image"; + chunks: string[]; +} + +export type Chunks = TweetChunks | PageOrNoteChunks | ImageChunks; + +export interface KVBulkItem { + key: string; + value: string; + base64: boolean; +} + export const contentObj = z.object({ role: z.string(), parts: z diff --git a/apps/cf-ai-backend/src/utils/chunkPageOrNotes.ts b/apps/cf-ai-backend/src/utils/chunkPageOrNotes.ts new file mode 100644 index 00000000..f04ed0c5 --- /dev/null +++ b/apps/cf-ai-backend/src/utils/chunkPageOrNotes.ts @@ -0,0 +1,13 @@ +import chunkText from "./chonker"; +import { PageOrNoteChunks } from "../types"; +export function chunkPage(pageContent: string): PageOrNoteChunks { + const chunks = chunkText(pageContent, 1536); + + return { type: "page", chunks: chunks }; +} + +export function chunkNote(noteContent: string): PageOrNoteChunks { + const chunks = chunkText(noteContent, 1536); + + return { type: "note", chunks: chunks }; +} diff --git a/apps/cf-ai-backend/src/utils/chunkTweet.ts b/apps/cf-ai-backend/src/utils/chunkTweet.ts new file mode 100644 index 00000000..78f0f261 --- /dev/null +++ b/apps/cf-ai-backend/src/utils/chunkTweet.ts @@ -0,0 +1,65 @@ +import { TweetChunks } from "../types"; +import chunkText from "./chonker"; +import { getRawTweet } from "@repo/shared-types/utils"; + +interface Tweet { + id: string; + text: string; + links: Array; + images: Array; + videos: Array; +} +interface Metadata { + tweetId: string; + tweetLinks: any[]; + tweetVids: any[]; + tweetImages: any[]; +} + +export interface ThreadTweetData { + chunkedTweet: string[]; + metadata: Metadata; +} + +export function chunkThread(threadText: string): TweetChunks { + const thread = JSON.parse(threadText); + if (typeof thread == "string") { + console.log("DA WORKER FAILED DO SOMEHTING FIX DA WROKER"); + const rawTweet = getRawTweet(thread); + const parsedTweet: any = JSON.parse(rawTweet); + + const chunkedTweet = chunkText(parsedTweet.text, 1536); + const metadata: Metadata = { + tweetId: parsedTweet.id_str, + tweetLinks: parsedTweet.entities.urls.map((url: any) => url.expanded_url), + tweetVids: + parsedTweet.extended_entities?.media + .filter((media: any) => media.type === "video") + .map((media: any) => media.video_info!.variants[0].url) || [], + tweetImages: + parsedTweet.extended_entities?.media + .filter((media: any) => media.type === "photo") + .map((media: any) => media.media_url_https!) || [], + }; + + const chunks = [{ chunkedTweet: chunkedTweet, metadata }]; + + return { type: "tweet", chunks }; + } else { + console.log(JSON.stringify(thread)); + const chunkedTweets = thread.map((tweet: Tweet) => { + const chunkedTweet = chunkText(tweet.text, 1536); + + const metadata = { + tweetId: tweet.id, + tweetLinks: tweet.links, + tweetVids: tweet.videos, + tweetImages: tweet.images, + }; + + return { chunkedTweet, metadata }; + }); + + return { type: "tweet", chunks: chunkedTweets }; + } +} diff --git a/apps/cf-ai-backend/src/utils/kvBulkInsert.ts b/apps/cf-ai-backend/src/utils/kvBulkInsert.ts new file mode 100644 index 00000000..62236412 --- /dev/null +++ b/apps/cf-ai-backend/src/utils/kvBulkInsert.ts @@ -0,0 +1,43 @@ +import { KVBulkItem } from "../types"; + +export const bulkInsertKv = async ( + credentials: { + CF_KV_AUTH_TOKEN: string; + KV_NAMESPACE_ID: string; + CF_ACCOUNT_ID: string; + }, + keyData: { + chunkIds: Array; + urlid: string; + }, +) => { + const data: Array = keyData.chunkIds.map((chunkId) => ({ + key: chunkId, + value: keyData.urlid, + base64: false, + })); + + try { + const response = await fetch( + `https://api.cloudflare.com/client/v4/accounts/${credentials.CF_ACCOUNT_ID}/storage/kv/namespaces/${credentials.KV_NAMESPACE_ID}/bulk`, + { + method: "PUT", + headers: { + Authorization: `Bearer ${credentials.CF_KV_AUTH_TOKEN}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(data), + }, + ); + + if (!response.ok) { + throw new Error( + `can't insert bulk to kv because ${response.status} ${response.statusText} ${JSON.stringify(response.body)}`, + ); + } + return await response.json(); + } catch (e) { + //dosomething + throw e; + } +}; diff --git a/apps/extension/content/ContentApp.tsx b/apps/extension/content/ContentApp.tsx index 89a3a635..a510a77c 100644 --- a/apps/extension/content/ContentApp.tsx +++ b/apps/extension/content/ContentApp.tsx @@ -31,400 +31,408 @@ export default function ContentApp({ token: string | undefined; shadowRoot: ShadowRoot; }) { - const [hover, setHover] = useState(false); - - const { toast } = useToast(); - - const [loading, setLoading] = useState(false); - - const [webNote, setWebNote] = useState(""); - - const [importedCount, setImportedCount] = useState(0); - const [isImporting, setIsImporting] = useState(false); - const [importDone, setImportDone] = useState(false); - - const [portalContainer, setPortalContainer] = useState( - null, - ); - const [isPopoverOpen, setIsPopoverOpen] = useState(false); - const [isPopover2Open, setIsPopover2Open] = useState(false); - - const [spacesOptions, setSpacesOptions] = useState< - { id: number; name: string }[] - >([]); - const [selectedSpace, setSelectedSpace] = useState(); - - const [userNotLoggedIn, setUserNotLoggedIn] = useState(false); - - const showLoginToast = async () => { - setUserNotLoggedIn(true); - - const NOSHOW_TOAST = ["accounts.google.com", "supermemory.ai"]; - - const noLoginWarning = await chrome.storage.local.get("noLoginWarning"); - if (Object.keys(noLoginWarning).length > 0) { - return; - } - - if (!NOSHOW_TOAST.includes(window.location.host)) { - const t = toast({ - title: "Please login to supermemory.ai to use this extension.", - action: ( -
- - - -
- ), - }); - } - }; - - useEffect(() => { - document.addEventListener("mousemove", (e) => { - const percentageX = (e.clientX / window.innerWidth) * 100; - const percentageY = (e.clientY / window.innerHeight) * 100; - - if (percentageX > 75 && percentageY > 75) { - setHover(true); - } else { - setHover(false); - } - }); - - const getUserData = () => { - chrome.runtime.sendMessage({ type: "getJwt" }); - }; - - getUserData(); - - chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { - if (request.type === "import-update") { - setIsImporting(true); - setImportedCount(request.importedCount); - } - - if (request.type === "import-done") { - setIsImporting(false); - setImportDone(true); - } - - if (request.type === "supermemory-message") { - toast({ - title: request.message, - }); - } - }); - - const portalDiv = document.createElement("div"); - portalDiv.id = "popover-portal"; - shadowRoot.appendChild(portalDiv); - setPortalContainer(portalDiv); - - return () => { - document.removeEventListener("mousemove", () => {}); - }; - }, []); - - const getSpaces = async () => { - const response = await fetch(`${BACKEND_URL}/api/spaces`, { - headers: { - Authorization: `Bearer ${token}`, - }, - }); - - if (response.status === 401) { - showLoginToast(); - return; - } - - try { - const data = await response.json(); - setSpacesOptions(data.data); - } catch (e) { - console.error( - `Error in supermemory.ai extension: ${e}. Please contact the developer https://x.com/dhravyashah`, - ); - } - }; - - async function sendUrlToAPI(spaces: string[]) { - setLoading(true); - - setTimeout(() => { - setLoading(false); - }, 1500); - - // get the current URL - const url = window.location.href; - - const blacklist: string[] = []; - // check if the URL is blacklisted - if (blacklist.some((blacklisted) => url.includes(blacklisted))) { - return; - } else { - const clone = document.cloneNode(true) as Document; - const article = new Readability(clone).parse(); - - const ogImage = document - .querySelector('meta[property="og:image"]') - ?.getAttribute("content"); - - const favicon = ( - document.querySelector('link[rel="icon"]') as HTMLLinkElement - )?.href; - - setLoading(true); - - setIsPopoverOpen(false); - - await fetch(`${BACKEND_URL}/api/store`, { - method: "POST", - headers: { - Authorization: `Bearer ${token}`, - }, - body: JSON.stringify({ - pageContent: - (webNote ? `Note about this website: ${webNote}\n\n` : "") + - article?.textContent, - url: url + "#supermemory-user-" + Math.random(), - title: article?.title.slice(0, 500), - spaces: spaces, - description: article?.excerpt.slice(0, 250), - ogImage: ogImage?.slice(0, 1000), - image: favicon, - }), - }).then(async (rep) => { - if (rep.status === 401) { - showLoginToast(); - return; - } - - const d = await rep.json(); - - if (rep.status === 200) { - toast({ - title: "Saved to supermemory.ai", - }); - } else { - toast({ - title: `Failed to save to supermemory.ai: ${d.error ?? "Unknown error"}`, - }); - } - setLoading(false); - return rep; - }); - } - } - - if (!shadowRoot || !portalContainer) { - return null; - } - - return ( -
- - - setIsPopoverOpen(!isPopoverOpen)} - > - - - await getSpaces()} asChild> - - - - - - - - {userNotLoggedIn ? ( - <>You need to login to use this extension. - ) : ( -

Add to supermemory.ai

- )} -
-
-
- - {userNotLoggedIn ? ( -
- -
- ) : ( -
- - - -