Skip to content

Commit

Permalink
chore: onboarding bot
Browse files Browse the repository at this point in the history
  • Loading branch information
Keyrxng committed Oct 6, 2024
1 parent 6b05fdb commit 9229e86
Show file tree
Hide file tree
Showing 13 changed files with 465 additions and 10 deletions.
3 changes: 3 additions & 0 deletions .dev.vars.example
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
OPENAI_API_KEY="MY_SECRET"
SUPABASE_URL=""
SUPABASE_KEY=""
VOYAGEAI_API_KEY=""
9 changes: 6 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@
"@octokit/rest": "20.1.1",
"@octokit/webhooks": "13.2.7",
"@sinclair/typebox": "0.32.33",
"@supabase/supabase-js": "^2.45.4",
"@ubiquity-dao/ubiquibot-logger": "^1.3.0",
"dotenv": "^16.4.5",
"openai": "^4.63.0",
"typebox-validators": "0.3.5"
"typebox-validators": "0.3.5",
"voyageai": "^0.0.1-5"
},
"devDependencies": {
"@commitlint/cli": "19.3.0",
Expand Down Expand Up @@ -78,5 +80,6 @@
"extends": [
"@commitlint/config-conventional"
]
}
}
},
"packageManager": "[email protected]+sha1.ac34549e6aa8e7ead463a7407e1c7390f61a6610"
}
12 changes: 12 additions & 0 deletions src/adapters/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { SupabaseClient } from "@supabase/supabase-js";
import { Context } from "../types";
import { VoyageAIClient } from "voyageai";
import { Embeddings } from "./supabase/helpers/embeddings";

export function createAdapters(supabaseClient: SupabaseClient, voyage: VoyageAIClient, context: Context) {
return {
supabase: {
embeddings: new Embeddings(voyage, supabaseClient, context),
},
};
}
115 changes: 115 additions & 0 deletions src/adapters/supabase/helpers/embeddings.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import { SupabaseClient } from "@supabase/supabase-js";
import { Super } from "./supabase";
import { Context } from "../../../types/context";
import { VoyageAIClient } from "voyageai";
import { CommentType, EmbeddingClass, IssueSimilaritySearchResult } from "../../../types/embeddings";

const VECTOR_SIZE = 1024;

/**
* Embeddings class for creating, updating, and deleting embeddings.
*
* Schema is as follows:
* - `source_id` - The unique identifier for the embedding. (e.g. comment node_id, telegram chat_id, etc.)
* - `type` - The type of embedding. (e.g. setup_instructions, dao_info, task, comment). Consider this the category.
* - `plaintext` - The plaintext version of the markdown
* - `embedding` - The embedding vector for the markdown
* - `metadata` - Additional metadata for the embedding. (e.g. author_association, author_id, fileChunkIndex, filePath, isPrivate)
* - `created_at` - The timestamp when the embedding was created
* - `modified_at` - The timestamp when the embedding was last modified
*/
export class Embeddings extends Super {
private _voyageClient: VoyageAIClient;
constructor(voyageClient: VoyageAIClient, supabase: SupabaseClient, context: Context) {
super(supabase, context);
this._voyageClient = voyageClient;
}

async getEmbedding(sourceId: string): Promise<CommentType> {
const { data, error } = await this.supabase.from("content").select("*").eq("source_id", sourceId).single();
if (error && error.code !== "PGRST116") {
this.context.logger.error("Error getting comment", { err: error, sourceId });
}
return data;
}

async getEmbeddingsByClass(embeddingClass: EmbeddingClass): Promise<CommentType[]> {
const { data, error } = await this.supabase.from("content").select("*").eq("type", embeddingClass);
if (error) {
this.context.logger.error("Error getting comments", { err: error, embeddingClass });
return [];
}
return data;
}

// Working with embeddings
async findSimilarContent(markdown: string, threshold: number, currentId: string): Promise<IssueSimilaritySearchResult[]> {
const embedding = await this._embedWithVoyage(markdown, "query");
const { data, error } = await this.supabase.rpc("find_similar_content", {
curr_source_id: currentId,
query_embedding: embedding,
threshold: threshold,
});
if (error) {
this.context.logger.error("Error finding similar issues", error);
return [];
}
return data;
}

/**
* Compares a single query against all embeddings in the database and returns the most similar embeddings.
*/
async findRelevantContext(markdown: string, threshold: number): Promise<IssueSimilaritySearchResult[]> {
const embedding = await this._embedWithVoyage(markdown, "query");
const { data, error } = await this.supabase.rpc("find_relevant_context", {
query_embedding: embedding,
threshold: threshold,
});
if (error) {
this.context.logger.error("Error finding similar issues", error);
return [];
}
return data;
}

async hybridSearchWithMetadata(
queryEmbedding: number[],
contentType: string,
metadataKey?: string,
metadataValue?: string
) {
const { data, error } = await this.supabase
.rpc('hybridsearchwithmeta', {
query_embedding: queryEmbedding, // The embedding vector of the query
// content_type: contentType, // The classified content type (e.g., "setup_instructions")
// metadata_key: metadataKey, // The key to filter the metadata
// metadata_value: metadataValue // The value to filter the metadata
});

if (error) {
console.error('Error performing hybrid search:', error);
return null;
}

return data; // Results from the hybrid search
}

async _embedWithVoyage(text: string | null, inputType: "document" | "query"): Promise<number[]> {
try {
if (text === null) {
return new Array(VECTOR_SIZE).fill(0);
} else {
const response = await this._voyageClient.embed({
input: text,
model: "voyage-large-2-instruct",
inputType: inputType
});
return (response.data && response.data[0]?.embedding) || [];
}
} catch (err) {
throw new Error(this.context.logger.error("Error embedding comment", { err })?.logMessage.raw);
}
}
}

12 changes: 12 additions & 0 deletions src/adapters/supabase/helpers/supabase.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { SupabaseClient } from "@supabase/supabase-js";
import { Context } from "../../../types/context";

export class Super {
protected supabase: SupabaseClient;
protected context: Context;

constructor(supabase: SupabaseClient, context: Context) {
this.supabase = supabase;
this.context = context;
}
}
41 changes: 39 additions & 2 deletions src/handlers/ask-gpt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import OpenAI from "openai";
import { Context } from "../types";
import { createChatHistory, formatChatHistory } from "../helpers/format-chat-history";
import { recursivelyFetchLinkedIssues } from "../helpers/issue-fetching";
import { addCommentToIssue } from "./add-comment";

export async function askQuestion(context: Context, question: string) {
if (!question) {
Expand All @@ -10,10 +11,46 @@ export async function askQuestion(context: Context, question: string) {

const { specAndBodies, streamlinedComments } = await recursivelyFetchLinkedIssues({ context });

const formattedChat = await formatChatHistory(context, streamlinedComments, specAndBodies);
const embeddings = await fetchEmbeddingsContext(context, question);
let embeddingContext: null | string = null;

if (embeddings.length > 0) {
// TODO: config items for how many embeddings refs to use
// using the top for now
embeddingContext = embeddings[0].content_plaintext;
}

const debugMsg = `
<details> <summary> top ranked embeddings </summary>
\`\`\`json
${JSON.stringify(embeddings, null, 2)}
\`\`\`
</details>`

await addCommentToIssue(context, debugMsg);
const formattedChat = await formatChatHistory(context, streamlinedComments, specAndBodies, embeddingContext);

const chatDebugMsg = `
<details> <summary> chat history </summary>
\`\`\`json
${JSON.stringify(formattedChat, null, 2)}
\`\`\`
</details>`

await addCommentToIssue(context, chatDebugMsg);
return await askGpt(context, formattedChat);
}

async function fetchEmbeddingsContext(context: Context, query: string) {
const { adapters: { supabase } } = context;

const embeddings = await supabase.embeddings.findRelevantContext(query, 0.6);

const sorted = embeddings.sort((a, b) => b.similarity - a.similarity);

return sorted.slice(0, 3)
}

export async function askGpt(context: Context, formattedChat: string) {
const {
logger,
Expand All @@ -32,7 +69,7 @@ export async function askGpt(context: Context, formattedChat: string) {

const res: OpenAI.Chat.Completions.ChatCompletion = await openAi.chat.completions.create({
messages: createChatHistory(formattedChat),
model: model ?? "o1-mini",
model: "chatgpt-4o-latest",
});

const answer = res.choices[0].message.content;
Expand Down
10 changes: 9 additions & 1 deletion src/helpers/format-chat-history.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { createKey, streamlineComments } from "../handlers/comments";
import { fetchPullRequestDiff, fetchIssue, fetchIssueComments } from "./issue-fetching";
import { splitKey } from "./issue";

export async function formatChatHistory(context: Context, streamlined: Record<string, StreamlinedComment[]>, specAndBodies: Record<string, string>) {
export async function formatChatHistory(context: Context, streamlined: Record<string, StreamlinedComment[]>, specAndBodies: Record<string, string>, embeddingContext: string | null) {
const convoKeys = Object.keys(streamlined);
const specAndBodyKeys = Object.keys(specAndBodies);
const chatHistory: string[] = [];
Expand All @@ -18,6 +18,14 @@ export async function formatChatHistory(context: Context, streamlined: Record<st
chatHistory.push(block);
}

if (embeddingContext) {
chatHistory.push(
createHeader("Embedding Context", "Embedding Context"),
embeddingContext,
createFooter("Embedding Context")
);
}

return Array.from(new Set(chatHistory)).join("");
}

Expand Down
77 changes: 77 additions & 0 deletions src/helpers/query-nlp.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import OpenAI from "openai";
import { Context } from "../types";
import { EmbeddingClass, CommentType } from "../types/embeddings";
import { createAdapters } from "../adapters";
import { createClient } from "@supabase/supabase-js";
import { VoyageAIClient } from "voyageai";
/**
* Prior to using the query embedding to find related content,
* we first must NLP the query to categorize it into one of the
* "EmbeddingClass" types. "setup_instructions" | "dao_info" | "task" | "comment".
*
* This allows us to narrow the scope to only the section of information that we know
* is relevant to the user's query. we can use the entire embedding bank but
* refining the search to a specific class of embeddings will yield better results.
*/


export async function queryNlp(context: Context, query: string) {
const { logger, adapters: { supabase } } = context;

const classification = await zeroShotNlpClassify(context, query);
const queryEmbedding = await supabase.embeddings._embedWithVoyage(query, "query");

logger.info(`Classification of query`, { classification });
const embeddings = await supabase.embeddings.hybridSearchWithMetadata(queryEmbedding, classification);
console.log(`Found ${embeddings.length} embeddings for query`, { query, classification });

console.log("Embeddings", embeddings);
return embeddings;
}

export async function zeroShotNlpClassify(context: Context, query: string) {
const {
env: { OPENAI_API_KEY },
config: { openAiBaseUrl },
} = context;

const openAi = new OpenAI({
apiKey: OPENAI_API_KEY,
...(openAiBaseUrl && { baseURL: openAiBaseUrl }),
});


const sysMsg = `You are developer onboarding assistant, built by Ubiquity DAO and your name is UbiquityOS.
You are designed to help developers onboard to the Ubiquity DAO ecosystem, all queries will pertain to the Ubiquity DAO ecosystem.
You will classify a query and from that classification, we are able to fetch a category of embeddings to use as context for the query.
There are four classifications of user query:
- setup_instructions: This relates directly to questions which seek to understand how to set up a project.
e.g: "How do I setup the kernel?" "How do I start a plugin?"
- dao_info: This relates to questions which seek to understand the Ubiquity DAO ecosystem.
e.g: "What is the Ubiquity DAO?" "What is the Ubiquity DAO mission?"
- task: Tasks are issue specifications, they cover features, bugs, and other tasks that need to be completed.
e.g: "What is issue xyz about?" "How do I fix issue xyz?"
- comment: Comments are user comments on issues, they can be used to provide context to a query.
e.g: "What are the comments on issue xyz?" "What do people think about issue xyz?"
Reply with a one-word classification of the query.
`

const res: OpenAI.Chat.Completions.ChatCompletion = await openAi.chat.completions.create({
messages: [
{
role: "system",
content: sysMsg,
},
{
role: "user",
content: query,
},
],
model: "chatgpt-4o-latest",
});

return res.choices[0].message.content as EmbeddingClass;
}
14 changes: 12 additions & 2 deletions src/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,16 @@ import { askQuestion } from "./handlers/ask-gpt";
import { addCommentToIssue } from "./handlers/add-comment";
import { LogReturn, Logs } from "@ubiquity-dao/ubiquibot-logger";
import { Env } from "./types/env";
import { createClient } from "@supabase/supabase-js";
import { createAdapters } from "./adapters";
import { VoyageAIClient } from "voyageai";

export async function plugin(inputs: PluginInputs, env: Env) {
const octokit = new Octokit({ auth: inputs.authToken });
const client = createClient(env.SUPABASE_URL, env.SUPABASE_KEY);
const voyage = new VoyageAIClient({
apiKey: env.VOYAGEAI_API_KEY
});

const context: Context = {
eventName: inputs.eventName,
Expand All @@ -16,9 +23,12 @@ export async function plugin(inputs: PluginInputs, env: Env) {
octokit,
env,
logger: new Logs("debug"),
adapters: {} as Awaited<ReturnType<typeof createAdapters>>,
};

return runPlugin(context);
context.adapters = createAdapters(client, voyage, context);

return await runPlugin(context);
}

export async function runPlugin(context: Context) {
Expand All @@ -28,7 +38,7 @@ export async function runPlugin(context: Context) {
} = context;
const question = context.payload.comment.body;

const slugRegex = new RegExp(`@${UBIQUITY_OS_APP_SLUG} `, "gi");
const slugRegex = new RegExp(`^@${UBIQUITY_OS_APP_SLUG} `, "i");

if (!question.match(slugRegex)) {
logger.info("Comment does not mention the app. Skipping.");
Expand Down
2 changes: 2 additions & 0 deletions src/types/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { EmitterWebhookEvent as WebhookEvent, EmitterWebhookEventName as Webhook
import { PluginSettings } from "./plugin-inputs";
import { Logs } from "@ubiquity-dao/ubiquibot-logger";
import { Env } from "./env";
import { createAdapters } from "../adapters";

export type SupportedEventsU = "issue_comment.created";

Expand All @@ -17,4 +18,5 @@ export interface Context<T extends SupportedEventsU = SupportedEventsU, TU exten
config: PluginSettings;
logger: Logs;
env: Env;
adapters: Awaited<ReturnType<typeof createAdapters>>;
}
Loading

0 comments on commit 9229e86

Please sign in to comment.