chore: remove jsdoc comments, add helpful comments

ubq-testing · Oct 31, 2024 · 0324dc9 · 0324dc9
1 parent 398e993
commit 0324dc9
Show file tree

Hide file tree

Showing 7 changed files with 59 additions and 112 deletions.
diff --git a/src/adapters/openai/helpers/completions.ts b/src/adapters/openai/helpers/completions.ts
@@ -83,7 +83,7 @@ export class Completions extends SuperOpenAi {
       additionalContext.join("\n"),
     ].join("\n");
 
-    // logger.info(`System message: ${sysMsg}`);
+    logger.info(`System message: ${sysMsg}`);
     logger.info(`Query: ${query}`);
 
     const res: OpenAI.Chat.Completions.ChatCompletion = await this.client.chat.completions.create({
@@ -165,6 +165,7 @@ export class Completions extends SuperOpenAi {
   }
 
   async findTokenLength(prompt: string, additionalContext: string[] = [], localContext: string[] = [], groundTruths: string[] = []): Promise<number> {
+    // disallowedSpecial: new Set() because we pass the entire diff as the prompt we should account for all special characters
     return encode(prompt + additionalContext.join("\n") + localContext.join("\n") + groundTruths.join("\n"), { disallowedSpecial: new Set() }).length;
   }
 }
diff --git a/src/handlers/ask-llm.ts b/src/handlers/ask-llm.ts
@@ -8,34 +8,23 @@ import { fetchRepoDependencies, fetchRepoLanguageStats } from "./ground-truths/c
 import { findGroundTruths } from "./ground-truths/find-ground-truths";
 import { bubbleUpErrorComment, logger } from "../helpers/errors";
 
-/**
- * Asks a question to GPT and returns the response
- * @param context - The context object containing environment and configuration details
- * @param question - The question to ask GPT
- * @returns The response from GPT
- * @throws If no question is provided
- */
 export async function askQuestion(context: Context, question: string) {
   if (!question) {
-    throw context.logger.error("No question provided");
+    throw logger.error("No question provided");
   }
+  // using any links in comments or issue/pr bodies to fetch more context
   const { specAndBodies, streamlinedComments } = await recursivelyFetchLinkedIssues({
     context,
     owner: context.payload.repository.owner.login,
     repo: context.payload.repository.name,
   });
+  // build a nicely structure system message containing a streamlined chat history
+  // includes the current issue, any linked issues, and any linked PRs
   const formattedChat = await formatChatHistory(context, streamlinedComments, specAndBodies);
-  // logger.info(`${formattedChat.join("")}`);
+  logger.info(`${formattedChat.join("")}`);
   return await askLlm(context, question, formattedChat);
 }
 
-/**
- * Asks GPT a question and returns the completions
- * @param context - The context object containing environment and configuration details
- * @param question - The question to ask GPT
- * @param formattedChat - The formatted chat history to provide context to GPT
- * @returns completions - The completions generated by GPT
- **/
 export async function askLlm(context: Context, question: string, formattedChat: string[]): Promise<CompletionsType> {
   const {
     env: { UBIQUITY_OS_APP_NAME },
@@ -48,19 +37,24 @@ export async function askLlm(context: Context, question: string, formattedChat:
   } = context;
 
   try {
+    // using db functions to find similar comments and issues
     const [similarComments, similarIssues] = await Promise.all([
       comment.findSimilarComments(question, 1 - similarityThreshold, ""),
       issue.findSimilarIssues(question, 1 - similarityThreshold, ""),
     ]);
 
+    // combine the similar comments and issues into a single array
     const similarText = [
       ...(similarComments?.map((comment: CommentSimilaritySearchResult) => comment.comment_plaintext) || []),
       ...(similarIssues?.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext) || []),
     ];
 
+    // filter out any empty strings
     formattedChat = formattedChat.filter((text) => text);
 
+    // rerank the similar text using voyageai
     const rerankedText = similarText.length > 0 ? await reranker.reRankResults(similarText, question) : [];
+    // gather structural data about the payload repository
     const [languages, { dependencies, devDependencies }] = await Promise.all([fetchRepoLanguageStats(context), fetchRepoDependencies(context)]);
 
     let groundTruths: string[] = [];

diff --git a/src/handlers/comment-created-callback.ts b/src/handlers/comment-created-callback.ts
@@ -35,6 +35,7 @@ export async function issueCommentCreatedCallback(
     }
 
     const metadataString = createStructuredMetadata(
+      // don't change this header, it's used for tracking
       "ubiquity-os-llm-response",
       logger.info(`Answer: ${answer}`, {
         metadata: {

diff --git a/src/helpers/format-chat-history.ts b/src/helpers/format-chat-history.ts
@@ -10,6 +10,7 @@ export async function formatChatHistory(
   streamlined: Record<string, StreamlinedComment[]>,
   specAndBodies: Record<string, string>
 ): Promise<string[]> {
+  // At this point really we should have all the context we can obtain but we try again just in case
   const keys = new Set([...Object.keys(streamlined), ...Object.keys(specAndBodies), createKey(context.payload.issue.html_url)]);
   const tokenLimits: TokenLimits = {
     modelMaxTokenLimit: context.adapters.openai.completions.getModelMaxTokenLimit(context.config.model),
@@ -18,11 +19,14 @@ export async function formatChatHistory(
     tokensRemaining: 0,
   };
 
-  // minus the output tokens we have this many tokens to use
+  // what we start out with
   tokenLimits.tokensRemaining = tokenLimits.modelMaxTokenLimit - tokenLimits.maxCompletionTokens;
 
+  // careful adding any more API calls here as it's likely to hit the secondary rate limit
   const chatHistory = await Promise.all(
+    // keys are owner/repo/issueNum; so for each issue, we want to create a block
     Array.from(keys).map(async (key, i) => {
+      // if we run out of tokens, we should stop
       if (tokenLimits.tokensRemaining < 0) {
         logger.error(`Ran out of tokens at block ${i}`);
         return "";
@@ -35,6 +39,7 @@ export async function formatChatHistory(
         isCurrentIssue: key === createKey(context.payload.issue.html_url),
         tokenLimits,
       });
+      // update the token count
       tokenLimits.runningTokenCount = currentTokenCount;
       tokenLimits.tokensRemaining = tokenLimits.modelMaxTokenLimit - tokenLimits.maxCompletionTokens - currentTokenCount;
       return result;
@@ -44,6 +49,7 @@ export async function formatChatHistory(
   return Array.from(new Set(chatHistory)).filter((x): x is string => !!x);
 }
 
+// These give structure and provide the distinction between the different sections of the chat history
 function getCorrectHeaderString(prDiff: string | null, isCurrentIssue: boolean, isConvo: boolean) {
   const strings = {
     convo: {
@@ -90,15 +96,15 @@ async function createContextBlockSection({
   tokenLimits: TokenLimits;
 }): Promise<[number, string]> {
   let comments = streamlined[key];
-  if (!comments || comments.length === 0) {
+  // just in case we try again but we should already have the comments
+  if (!comments || !comments.length) {
     const [owner, repo, number] = splitKey(key);
     const { comments: fetchedComments } = await fetchIssueComments({
       context,
       owner,
       repo,
       issueNum: parseInt(number),
     });
-
     comments = streamlineComments(fetchedComments)[key];
   }
 
@@ -108,8 +114,11 @@ async function createContextBlockSection({
     throw context.logger.error("Issue number is not valid");
   }
 
+  // Fetch our diff if we have one; this excludes the largest of files to keep within token limits
   const { diff } = await fetchPullRequestDiff(context, org, repo, issueNumber, tokenLimits);
+  // specification or pull request body
   let specOrBody = specAndBodies[key];
+  // we should have it already but just in case
   if (!specOrBody) {
     specOrBody =
       (
@@ -122,49 +131,43 @@ async function createContextBlockSection({
       )?.body || "No specification or body available";
   }
 
-  const specHeader = getCorrectHeaderString(diff, isCurrentIssue, false);
-  const blockHeader = getCorrectHeaderString(diff, isCurrentIssue, true);
+  const specHeader = getCorrectHeaderString(diff, isCurrentIssue, false); //E.g:  === Current Task Specification ===
+  const blockHeader = getCorrectHeaderString(diff, isCurrentIssue, true); //E.g:  === Linked Task Conversation ===
 
+  // contains the actual spec or body
   const specBlock = [createHeader(specHeader, key), createSpecOrBody(specOrBody), createFooter(specHeader, key)];
+  // contains the conversation
   const commentSection = createComment({ issueNumber, repo, org, comments }, specOrBody);
 
   let block;
+  // if we have a conversation, we should include it
   if (commentSection) {
     block = [specBlock.join("\n"), createHeader(blockHeader, key), commentSection, createFooter(blockHeader, key)];
   } else {
-    // in this scenario we have no task/PR conversation, just the spec
+    // No need for empty sections in the chat history
     block = [specBlock.join("\n")];
   }
 
   // only inject the README if this is the current issue as that's likely most relevant
   if (isCurrentIssue) {
     const readme = await pullReadmeFromRepoForIssue({ context, owner: org, repo });
+    // give the readme it's own clear section
     if (readme) {
       const readmeBlock = readme ? [createHeader("README", key), createSpecOrBody(readme), createFooter("README", key)] : [];
       block = block.concat(readmeBlock);
     }
   }
 
   if (!diff) {
+    // the diff was already encoded etc but we have added more to the block so we need to re-encode
     return [await context.adapters.openai.completions.findTokenLength(block.join("")), block.join("\n")];
   }
 
+  // Build the block with the diff in it's own section
   const blockWithDiff = [block.join("\n"), createHeader(`Pull Request Diff`, key), diff, createFooter(`Pull Request Diff`, key)];
   return [await context.adapters.openai.completions.findTokenLength(blockWithDiff.join("")), blockWithDiff.join("\n")];
 }
 
-/**
- * Might not need to splice from the formatted window
-function removeSections(fullText: string, header: string, footer: string): string {
-  const regex = new RegExp(`${escapeRegExp(header)}[\\s\\S]*?${escapeRegExp(footer)}`, 'g');
-  return fullText.replace(regex, '').trim();
-}
-
-function escapeRegExp(text: string): string {
-  return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
-}
- */
-
 function createHeader(content: string, repoString: string) {
   return `=== ${content} === ${repoString} ===\n`;
 }
@@ -184,6 +187,7 @@ function createComment(comment: StreamlinedComments, specOrBody: string) {
 
   const seen = new Set<number>();
   comment.comments = comment.comments.filter((c) => {
+    // Do not include the same comment twice or the spec/body
     if (seen.has(c.id) || c.body === specOrBody) {
       return false;
     }

diff --git a/src/helpers/issue-fetching.ts b/src/helpers/issue-fetching.ts
@@ -7,28 +7,21 @@ import { dedupeStreamlinedComments, fetchCodeLinkedFromIssue, idIssueFromComment
 import { handleIssue, handleSpec, handleSpecAndBodyKeys, throttlePromises } from "./issue-handling";
 import { processPullRequestDiff } from "./pull-request-parsing";
 
-/**
- * Recursively fetches linked issues and processes them, including fetching comments and specifications.
- *
- * @param params - The parameters required to fetch the linked issues, including context and other details.
- * @returns A promise that resolves to an object containing linked issues, specifications, streamlined comments, and seen issue keys.
- */
 export async function recursivelyFetchLinkedIssues(params: FetchParams) {
+  // take a first run at gathering everything we need and package it up
   const { linkedIssues, seen, specAndBodies, streamlinedComments } = await fetchLinkedIssues(params);
+  // build promises and throttle them; this calls handleSpec which is a recursive function potentially to great depth
   const fetchPromises = linkedIssues.map(async (linkedIssue) => await mergeCommentsAndFetchSpec(params, linkedIssue, streamlinedComments, specAndBodies, seen));
   await throttlePromises(fetchPromises, 10);
+  // handle the keys that have been gathered
   const linkedIssuesKeys = linkedIssues.map((issue) => createKey(`${issue.owner}/${issue.repo}/${issue.issueNumber}`));
+  // exhaustive list of unique keys from the first full pass
   const specAndBodyKeys = Array.from(new Set([...Object.keys(specAndBodies), ...Object.keys(streamlinedComments), ...linkedIssuesKeys]));
+  // this fn throttles from within but again, be weary of the rate limit
   await handleSpecAndBodyKeys(specAndBodyKeys, params, dedupeStreamlinedComments(streamlinedComments), seen);
   return { linkedIssues, specAndBodies, streamlinedComments };
 }
 
-/**
- * Fetches linked issues recursively and processes them.
- *
- * @param params - The parameters required to fetch the linked issues, including context and other details.
- * @returns A promise that resolves to an object containing linked issues, specifications, streamlined comments, and seen issue keys.
- */
 export async function fetchLinkedIssues(params: FetchParams) {
   const { comments, issue } = await fetchIssueComments(params);
   if (!issue) {
@@ -100,15 +93,6 @@ export async function fetchLinkedIssues(params: FetchParams) {
   return { streamlinedComments, linkedIssues, specAndBodies, seen };
 }
 
-/**
- * Merges comments and fetches the specification for a linked issue.
- *
- * @param params - The parameters required to fetch the linked issue, including context and other details.
- * @param linkedIssue - The linked issue for which comments and specifications need to be fetched.
- * @param streamlinedComments - A record of streamlined comments associated with issues.
- * @param specOrBodies - A record of specifications or bodies associated with issues.
- * @param seen - A set of issue keys that have already been processed to avoid duplication.
- */
 export async function mergeCommentsAndFetchSpec(
   params: FetchParams,
   linkedIssue: LinkedIssues,
@@ -148,11 +132,6 @@ export async function fetchPullRequestDiff(context: Context, org: string, repo:
   return await processPullRequestDiff(diff, tokenLimits);
 }
 
-/**
- * Fetches an issue from the GitHub API.
- * @param params - Context
- * @returns A promise that resolves to an issue object or null if an error occurs.
- */
 export async function fetchIssue(params: FetchParams): Promise<Issue | null> {
   const { octokit, payload, logger } = params.context;
   const { issueNum, owner, repo } = params;
@@ -227,15 +206,6 @@ export async function fetchIssueComments(params: FetchParams) {
   };
 }
 
-/**
- * Fetches and handles an issue based on the provided key and parameters.
- *
- * @param key - The unique key representing the issue in the format "owner/repo/issueNumber".
- * @param params - The parameters required to fetch the issue, including context and other details.
- * @param streamlinedComments - A record of streamlined comments associated with issues.
- * @param seen - A set of issue keys that have already been processed to avoid duplication.
- * @returns A promise that resolves to an array of streamlined comments for the specified issue.
- */
 export async function fetchAndHandleIssue(
   key: string,
   params: FetchParams,

diff --git a/src/helpers/issue-handling.ts b/src/helpers/issue-handling.ts
@@ -4,14 +4,6 @@ import { StreamlinedComment } from "../types/llm";
 import { idIssueFromComment, mergeStreamlinedComments, splitKey } from "./issue";
 import { fetchLinkedIssues, fetchIssue, fetchAndHandleIssue, mergeCommentsAndFetchSpec } from "./issue-fetching";
 
-/**
- * Handles the processing of an issue.
- *
- * @param params - The parameters required to fetch and handle issues.
- * @param streamlinedComments - A record of streamlined comments indexed by keys.
- * @param alreadySeen - A set of keys that have already been processed to avoid duplication.
- * @returns A promise that resolves when the issue has been handled.
- */
 export async function handleIssue(params: FetchParams, streamlinedComments: Record<string, StreamlinedComment[]>, alreadySeen: Set<string>) {
   if (alreadySeen.has(createKey(`${params.owner}/${params.repo}/${params.issueNum}`))) {
     return;
@@ -22,17 +14,6 @@ export async function handleIssue(params: FetchParams, streamlinedComments: Reco
   return mergeStreamlinedComments(streamlinedComments, streamlined);
 }
 
-/**
- * Handles the processing of a specification or body text.
- *
- * @param params - The parameters required to fetch and handle issues.
- * @param specOrBody - The specification or body text to be processed.
- * @param specAndBodies - A record of specifications and bodies indexed by keys.
- * @param key - The key associated with the current specification or body.
- * @param seen - A set of keys that have already been processed to avoid duplication.
- * @param streamlinedComments - A record of streamlined comments indexed by keys.
- * @returns A promise that resolves to the updated record of specifications and bodies.
- */
 export async function handleSpec(
   params: FetchParams,
   specOrBody: string,
@@ -73,14 +54,6 @@ export async function handleSpec(
   return specAndBodies;
 }
 
-/**
- * Handles the processing of a comment.
- *
- * @param params - The parameters required to fetch and handle issues.
- * @param comment - The comment to be processed.
- * @param streamlinedComments - A record of streamlined comments indexed by keys.
- * @param seen - A set of keys that have already been processed to avoid duplication.
- */
 export async function handleComment(
   params: FetchParams,
   comment: StreamlinedComment,
@@ -100,15 +73,8 @@ export async function handleComment(
   }
 }
 
-/**
- * Handles the processing of specification and body keys.
- *
- * @param keys - An array of keys representing issues or comments to be processed.
- * @param params - The parameters required to fetch and handle issues.
- * @param streamlinedComments - A record of streamlined comments indexed by keys.
- * @param seen - A set of keys that have already been processed to avoid duplication.
- */
 export async function handleSpecAndBodyKeys(keys: string[], params: FetchParams, streamlinedComments: Record<string, StreamlinedComment[]>, seen: Set<string>) {
+  // Make one last sweep just to be sure we have everything
   const commentProcessingPromises = keys.map(async (key) => {
     let comments = streamlinedComments[key];
     if (!comments || comments.length === 0) {
@@ -122,12 +88,6 @@ export async function handleSpecAndBodyKeys(keys: string[], params: FetchParams,
   await throttlePromises(commentProcessingPromises, 10);
 }
 
-/**
- * Throttles the execution of promises to ensure that no more than the specified limit are running concurrently.
- *
- * @param promises - An array of promises to be executed.
- * @param limit - The maximum number of promises to run concurrently.
- */
 export async function throttlePromises(promises: Promise<void>[], limit: number) {
   const executing: Promise<void>[] = [];
   for (const promise of promises) {