ubiquity-os-marketplace · Keyrxng · Oct 23, 2024 · Oct 23, 2024 · Oct 23, 2024 · Oct 23, 2024
diff --git a/.cspell.json b/.cspell.json
@@ -30,7 +30,8 @@
     "mixtral",
     "nemo",
     "Reranking",
-    "mistralai"
+    "mistralai",
+    "Precheck"
   ],
   "dictionaries": ["typescript", "node", "software-terms"],
   "import": ["@cspell/dict-typescript/cspell-ext.json", "@cspell/dict-node/cspell-ext.json", "@cspell/dict-software-terms"],

diff --git a/.gitignore b/.gitignore
@@ -16,3 +16,4 @@ cypress/screenshots
 script.ts
 .wrangler
 test-dashboard.md
+payloads.json
diff --git a/manifest.json b/manifest.json
@@ -1,5 +1,5 @@
 {
   "name": "command-ask",
   "description": "A highly context aware organization integrated chatbot",
-  "ubiquity:listeners": ["issue_comment.created"]
+  "ubiquity:listeners": ["issue_comment.created", "pull_request.opened", "pull_request.ready_for_review"]
 }
diff --git a/package.json b/package.json
@@ -28,6 +28,7 @@
   ],
   "dependencies": {
     "@mswjs/data": "^0.16.2",
+    "@octokit/graphql-schema": "^15.25.0",
     "@octokit/rest": "20.1.1",
     "@octokit/webhooks": "13.2.7",
     "@sinclair/typebox": "0.32.33",

diff --git a/src/adapters/openai/helpers/completions.ts b/src/adapters/openai/helpers/completions.ts
@@ -3,7 +3,7 @@ import { Context } from "../../../types";
 import { SuperOpenAi } from "./openai";
 const MAX_TOKENS = 7000;
 
-export interface CompletionsType {
+export interface ResponseFromLlm {
   answer: string;
   tokenUsage: {
     input: number;
@@ -20,14 +20,34 @@ export class Completions extends SuperOpenAi {
     this.context = context;
   }
 
+  private _createSystemMessage(systemMessage: string, additionalContext: string[], localContext: string[], groundTruths: string[], botName: string) {
+    // safer to use array join than string concatenation
+    const parts = [
+      "You Must obey the following ground truths: [",
+      groundTruths.join(":"),
+      "]\n",
+      systemMessage,
+      "Your name is : ",
+      botName,
+      "\n",
+      "Primary Context: ",
+      additionalContext.join("\n"),
+      "\nLocal Context: ",
+      localContext.join("\n"),
+    ];
+
+    return parts.join("\n");
+  }
+
   async createCompletion(
+    systemMessage: string,
     prompt: string,
     model: string = "o1-mini",
     additionalContext: string[],
     localContext: string[],
     groundTruths: string[],
     botName: string
-  ): Promise<CompletionsType> {
+  ): Promise<ResponseFromLlm> {
     const res: OpenAI.Chat.Completions.ChatCompletion = await this.client.chat.completions.create({
       model: model,
       messages: [
@@ -36,18 +56,7 @@ export class Completions extends SuperOpenAi {
           content: [
             {
               type: "text",
-              text:
-                "You Must obey the following ground truths: [" +
-                groundTruths.join(":") +
-                "]\n" +
-                "You are tasked with assisting as a GitHub bot by generating responses based on provided chat history and similar responses, focusing on using available knowledge within the provided corpus, which may contain code, documentation, or incomplete information. Your role is to interpret and use this knowledge effectively to answer user questions.\n\n# Steps\n\n1. **Understand Context**: Review the chat history and any similar provided responses to understand the context.\n2. **Extract Relevant Information**: Identify key pieces of information, even if they are incomplete, from the available corpus.\n3. **Apply Knowledge**: Use the extracted information and relevant documentation to construct an informed response.\n4. **Draft Response**: Compile the gathered insights into a coherent and concise response, ensuring it's clear and directly addresses the user's query.\n5. **Review and Refine**: Check for accuracy and completeness, filling any gaps with logical assumptions where necessary.\n\n# Output Format\n\n- Concise and coherent responses in paragraphs that directly address the user's question.\n- Incorporate inline code snippets or references from the documentation if relevant.\n\n# Examples\n\n**Example 1**\n\n*Input:*\n- Chat History: \"What was the original reason for moving the LP tokens?\"\n- Corpus Excerpts: \"It isn't clear to me if we redid the staking yet and if we should migrate. If so, perhaps we should make a new issue instead. We should investigate whether the missing LP tokens issue from the MasterChefV2.1 contract is critical to the decision of migrating or not.\"\n\n*Output:*\n\"It was due to missing LP tokens issue from the MasterChefV2.1 Contract.\n\n# Notes\n\n- Ensure the response is crafted from the corpus provided, without introducing information outside of what's available or relevant to the query.\n- Consider edge cases where the corpus might lack explicit answers, and justify responses with logical reasoning based on the existing information." +
-                "Your name is : " +
-                botName +
-                "\n" +
-                "Primary Context: " +
-                additionalContext.join("\n") +
-                "\nLocal Context: " +
-                localContext.join("\n"),
+              text: this._createSystemMessage(systemMessage, additionalContext, localContext, groundTruths, botName),
             },
           ],
         },

diff --git a/src/adapters/openai/helpers/prompts.ts b/src/adapters/openai/helpers/prompts.ts
@@ -0,0 +1,32 @@
+export const DEFAULT_SYSTEM_MESSAGE = `You are tasked with assisting as a GitHub bot by generating responses based on provided chat history and similar responses, focusing on using available knowledge within the provided corpus, which may contain code, documentation, or incomplete information. Your role is to interpret and use this knowledge effectively to answer user questions.
+
+# Steps
+
+1. **Understand Context**: Review the chat history and any similar provided responses to understand the context.
+2. **Extract Relevant Information**: Identify key pieces of information, even if they are incomplete, from the available corpus.
+3. **Apply Knowledge**: Use the extracted information and relevant documentation to construct an informed response.
+4. **Draft Response**: Compile the gathered insights into a coherent and concise response, ensuring it's clear and directly addresses the user's query.
+5. **Review and Refine**: Check for accuracy and completeness, filling any gaps with logical assumptions where necessary.
+
+# Output Format
+
+- Concise and coherent responses in paragraphs that directly address the user's question.
+- Incorporate inline code snippets or references from the documentation if relevant.
+
+# Examples
+
+**Example 1**
+
+*Input:*
+- Chat History: "What was the original reason for moving the LP tokens?"
+- Corpus Excerpts: "It isn't clear to me if we redid the staking yet and if we should migrate. If so, perhaps we should make a new issue instead. We should investigate whether the missing LP tokens issue from the MasterChefV2.1 contract is critical to the decision of migrating or not."
+
+*Output:*
+"It was due to missing LP tokens issue from the MasterChefV2.1 Contract.
+
+# Notes
+
+- Ensure the response is crafted from the corpus provided, without introducing information outside of what's available or relevant to the query.
+- Consider edge cases where the corpus might lack explicit answers, and justify responses with logical reasoning based on the existing information.`;
+
+export const PULL_PRECHECK_SYSTEM_MESSAGE = `Perform code review using the diff and spec.`;
diff --git a/src/handlers/add-comment.ts b/src/handlers/add-comment.ts
@@ -1,3 +1,4 @@
+import { getIssueNumberFromPayload } from "../helpers/get-issue-no-from-payload";
 import { Context } from "../types/context";
 
 /**
@@ -7,7 +8,8 @@ import { Context } from "../types/context";
  */
 export async function addCommentToIssue(context: Context, message: string) {
   const { payload } = context;
-  const issueNumber = payload.issue.number;
+  const issueNumber = getIssueNumberFromPayload(payload);
+
   try {
     await context.octokit.issues.createComment({
       owner: payload.repository.owner.login,

diff --git a/src/handlers/ask-llm.ts b/src/handlers/ask-llm.ts
@@ -1,10 +1,11 @@
 import { Context } from "../types";
-import { CompletionsType } from "../adapters/openai/helpers/completions";
+import { ResponseFromLlm } from "../adapters/openai/helpers/completions";
 import { CommentSimilaritySearchResult } from "../adapters/supabase/helpers/comment";
 import { IssueSimilaritySearchResult } from "../adapters/supabase/helpers/issues";
 import { recursivelyFetchLinkedIssues } from "../helpers/issue-fetching";
 import { formatChatHistory } from "../helpers/format-chat-history";
 import { optimizeContext } from "../helpers/issue";
+import { DEFAULT_SYSTEM_MESSAGE } from "../adapters/openai/helpers/prompts";
 
 /**
  * Asks a question to GPT and returns the response
@@ -13,14 +14,15 @@ import { optimizeContext } from "../helpers/issue";
  * @returns The response from GPT
  * @throws If no question is provided
  */
-export async function askQuestion(context: Context, question: string) {
+export async function askQuestion(context: Context<"issue_comment.created">, question: string) {
   if (!question) {
     throw context.logger.error("No question provided");
   }
   const { specAndBodies, streamlinedComments } = await recursivelyFetchLinkedIssues({
     context,
     owner: context.payload.repository.owner.login,
     repo: context.payload.repository.name,
+    issueNum: context.payload.issue.number,
   });
   const formattedChat = await formatChatHistory(context, streamlinedComments, specAndBodies);
   context.logger.info(`${formattedChat.join("")}`);
@@ -34,7 +36,7 @@ export async function askQuestion(context: Context, question: string) {
  * @param formattedChat - The formatted chat history to provide context to GPT
  * @returns completions - The completions generated by GPT
  **/
-export async function askGpt(context: Context, question: string, formattedChat: string[]): Promise<CompletionsType> {
+export async function askGpt(context: Context, question: string, formattedChat: string[]): Promise<ResponseFromLlm> {
   const {
     env: { UBIQUITY_OS_APP_NAME },
     config: { model, similarityThreshold },
@@ -63,6 +65,7 @@ export async function askGpt(context: Context, question: string, formattedChat:
   similarText = similarText.filter((text) => text !== "");
   const rerankedText = similarText.length > 0 ? await context.adapters.voyage.reranker.reRankResults(similarText, question) : [];
   return context.adapters.openai.completions.createCompletion(
+    DEFAULT_SYSTEM_MESSAGE,
     question,
     model,
     rerankedText,

diff --git a/src/handlers/comment-created-callback.ts b/src/handlers/comment-created-callback.ts
@@ -1,8 +1,7 @@
 import { Context, SupportedEvents } from "../types";
-import { addCommentToIssue } from "./add-comment";
-import { askQuestion } from "./ask-llm";
 import { CallbackResult } from "../types/proxy";
-import { bubbleUpErrorComment } from "../helpers/errors";
+import { askQuestion } from "./ask-llm";
+import { handleLlmQueryOutput } from "./llm-query-output";
 
 export async function issueCommentCreatedCallback(
   context: Context<"issue_comment.created", SupportedEvents["issue_comment.created"]>
@@ -23,19 +22,5 @@ export async function issueCommentCreatedCallback(
     return { status: 204, reason: logger.info("Comment is empty. Skipping.").logMessage.raw };
   }
   logger.info(`Asking question: ${question}`);
-
-  try {
-    const response = await askQuestion(context, question);
-    const { answer, tokenUsage } = response;
-    if (!answer) {
-      throw logger.error(`No answer from OpenAI`);
-    }
-    logger.info(`Answer: ${answer}`, { tokenUsage });
-    const tokens = `\n\n<!--\n${JSON.stringify(tokenUsage, null, 2)}\n--!>`;
-    const commentToPost = answer + tokens;
-    await addCommentToIssue(context, commentToPost);
-    return { status: 200, reason: logger.info("Comment posted successfully").logMessage.raw };
-  } catch (error) {
-    throw await bubbleUpErrorComment(context, error, false);
-  }
+  return await handleLlmQueryOutput(context, await askQuestion(context, question));
 }
diff --git a/src/handlers/find-ground-truths.ts b/src/handlers/find-ground-truths.ts
@@ -0,0 +1,86 @@
+import OpenAI from "openai";
+import { Context } from "../types";
+import { logger } from "../helpers/errors";
+
+const FIND_GROUND_TRUTHS_SYSTEM_MESSAGE = `Using the input provided, your goal is to produce an array of strings that represent "Ground Truths."
+    These ground truths are high-level abstractions that encapsulate the key aspects of the task.
+    They serve to guide and inform our code review model's interpretation of the task by providing clear, concise, and explicit insights.
+
+    Each ground truth should:
+    - Be succinct and easy to understand.
+    - Directly pertain to the task at hand.
+    - Focus on essential requirements, behaviors, or assumptions involved in the task.
+
+    Example:
+    Task: Implement a function that adds two numbers.
+    Ground Truths:
+    - The function should accept two numerical inputs.
+    - The function should return the sum of the two inputs.
+    - Inputs must be validated to ensure they are numbers.
+
+    Based on the given task, generate similar ground truths adhering to a maximum of 10.
+
+    Return a JSON parsable array of strings representing the ground truths, without comment or directive.`;
+
+function validateGroundTruths(truthsString: string): string[] {
+  let truths;
+  try {
+    truths = JSON.parse(truthsString);
+  } catch (err) {
+    throw logger.error("Failed to parse ground truths");
+  }
+  if (!Array.isArray(truths)) {
+    throw logger.error("Ground truths must be an array");
+  }
+
+  if (truths.length > 10) {
+    throw logger.error("Ground truths must not exceed 10");
+  }
+
+  truths.forEach((truth: string) => {
+    if (typeof truth !== "string") {
+      throw logger.error("Each ground truth must be a string");
+    }
+  });
+
+  return truths;
+}
+
+export async function findGroundTruths(context: Context, groundTruthSource: string) {
+  const {
+    env: { OPENAI_API_KEY },
+    config: { openAiBaseUrl, model },
+  } = context;
+
+  const openAi = new OpenAI({
+    apiKey: OPENAI_API_KEY,
+    ...(openAiBaseUrl && { baseURL: openAiBaseUrl }),
+  });
+
+  const res = await openAi.chat.completions.create({
+    messages: [
+      {
+        role: "system",
+        content: FIND_GROUND_TRUTHS_SYSTEM_MESSAGE,
+      },
+      {
+        role: "user",
+        content: groundTruthSource,
+      },
+    ],
+    /**
+     * I've used the config model here but in my opinion,
+     * we should optimize this for a quicker response which
+     * means no advanced reasoning models. rfc
+     */
+    model: model,
+  });
+
+  const output = res.choices[0].message.content;
+
+  if (!output) {
+    throw logger.error("Failed to produce a ground truths response");
+  }
+
+  return validateGroundTruths(output);
+}
diff --git a/src/handlers/llm-query-output.ts b/src/handlers/llm-query-output.ts
@@ -0,0 +1,22 @@
+import { ResponseFromLlm } from "../adapters/openai/helpers/completions";
+import { bubbleUpErrorComment } from "../helpers/errors";
+import { Context } from "../types";
+import { CallbackResult } from "../types/proxy";
+import { addCommentToIssue } from "./add-comment";
+
+export async function handleLlmQueryOutput(context: Context, llmResponse: ResponseFromLlm): Promise<CallbackResult> {
+  const { logger } = context;
+  try {
+    const { answer, tokenUsage } = llmResponse;
+    if (!answer) {
+      throw logger.error(`No answer from OpenAI`);
+    }
+    logger.info(`Answer: ${answer}`, { tokenUsage });
+    const tokens = `\n\n<!--\n${JSON.stringify(tokenUsage, null, 2)}\n--!>`;
+    const commentToPost = answer + tokens;
+    await addCommentToIssue(context, commentToPost);
+    return { status: 200, reason: logger.info("Comment posted successfully").logMessage.raw };
+  } catch (error) {
+    throw await bubbleUpErrorComment(context, error, false);
+  }
+}