Skip to content

Commit

Permalink
chore: remove jsdoc comments, add helpful comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Keyrxng committed Oct 31, 2024
1 parent 398e993 commit 0324dc9
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 112 deletions.
3 changes: 2 additions & 1 deletion src/adapters/openai/helpers/completions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ export class Completions extends SuperOpenAi {
additionalContext.join("\n"),
].join("\n");

// logger.info(`System message: ${sysMsg}`);
logger.info(`System message: ${sysMsg}`);
logger.info(`Query: ${query}`);

const res: OpenAI.Chat.Completions.ChatCompletion = await this.client.chat.completions.create({
Expand Down Expand Up @@ -165,6 +165,7 @@ export class Completions extends SuperOpenAi {
}

async findTokenLength(prompt: string, additionalContext: string[] = [], localContext: string[] = [], groundTruths: string[] = []): Promise<number> {
// disallowedSpecial: new Set() because we pass the entire diff as the prompt we should account for all special characters
return encode(prompt + additionalContext.join("\n") + localContext.join("\n") + groundTruths.join("\n"), { disallowedSpecial: new Set() }).length;
}
}
26 changes: 10 additions & 16 deletions src/handlers/ask-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,34 +8,23 @@ import { fetchRepoDependencies, fetchRepoLanguageStats } from "./ground-truths/c
import { findGroundTruths } from "./ground-truths/find-ground-truths";
import { bubbleUpErrorComment, logger } from "../helpers/errors";

/**
* Asks a question to GPT and returns the response
* @param context - The context object containing environment and configuration details
* @param question - The question to ask GPT
* @returns The response from GPT
* @throws If no question is provided
*/
export async function askQuestion(context: Context, question: string) {
if (!question) {
throw context.logger.error("No question provided");
throw logger.error("No question provided");
}
// using any links in comments or issue/pr bodies to fetch more context
const { specAndBodies, streamlinedComments } = await recursivelyFetchLinkedIssues({
context,
owner: context.payload.repository.owner.login,
repo: context.payload.repository.name,
});
// build a nicely structure system message containing a streamlined chat history
// includes the current issue, any linked issues, and any linked PRs
const formattedChat = await formatChatHistory(context, streamlinedComments, specAndBodies);
// logger.info(`${formattedChat.join("")}`);
logger.info(`${formattedChat.join("")}`);
return await askLlm(context, question, formattedChat);
}

/**
* Asks GPT a question and returns the completions
* @param context - The context object containing environment and configuration details
* @param question - The question to ask GPT
* @param formattedChat - The formatted chat history to provide context to GPT
* @returns completions - The completions generated by GPT
**/
export async function askLlm(context: Context, question: string, formattedChat: string[]): Promise<CompletionsType> {
const {
env: { UBIQUITY_OS_APP_NAME },
Expand All @@ -48,19 +37,24 @@ export async function askLlm(context: Context, question: string, formattedChat:
} = context;

try {
// using db functions to find similar comments and issues
const [similarComments, similarIssues] = await Promise.all([
comment.findSimilarComments(question, 1 - similarityThreshold, ""),
issue.findSimilarIssues(question, 1 - similarityThreshold, ""),
]);

// combine the similar comments and issues into a single array
const similarText = [
...(similarComments?.map((comment: CommentSimilaritySearchResult) => comment.comment_plaintext) || []),
...(similarIssues?.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext) || []),
];

// filter out any empty strings
formattedChat = formattedChat.filter((text) => text);

// rerank the similar text using voyageai
const rerankedText = similarText.length > 0 ? await reranker.reRankResults(similarText, question) : [];
// gather structural data about the payload repository
const [languages, { dependencies, devDependencies }] = await Promise.all([fetchRepoLanguageStats(context), fetchRepoDependencies(context)]);

let groundTruths: string[] = [];
Expand Down
1 change: 1 addition & 0 deletions src/handlers/comment-created-callback.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ export async function issueCommentCreatedCallback(
}

const metadataString = createStructuredMetadata(
// don't change this header, it's used for tracking
"ubiquity-os-llm-response",
logger.info(`Answer: ${answer}`, {
metadata: {
Expand Down
40 changes: 22 additions & 18 deletions src/helpers/format-chat-history.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export async function formatChatHistory(
streamlined: Record<string, StreamlinedComment[]>,
specAndBodies: Record<string, string>
): Promise<string[]> {
// At this point really we should have all the context we can obtain but we try again just in case
const keys = new Set([...Object.keys(streamlined), ...Object.keys(specAndBodies), createKey(context.payload.issue.html_url)]);
const tokenLimits: TokenLimits = {
modelMaxTokenLimit: context.adapters.openai.completions.getModelMaxTokenLimit(context.config.model),
Expand All @@ -18,11 +19,14 @@ export async function formatChatHistory(
tokensRemaining: 0,
};

// minus the output tokens we have this many tokens to use
// what we start out with
tokenLimits.tokensRemaining = tokenLimits.modelMaxTokenLimit - tokenLimits.maxCompletionTokens;

// careful adding any more API calls here as it's likely to hit the secondary rate limit
const chatHistory = await Promise.all(
// keys are owner/repo/issueNum; so for each issue, we want to create a block
Array.from(keys).map(async (key, i) => {
// if we run out of tokens, we should stop
if (tokenLimits.tokensRemaining < 0) {
logger.error(`Ran out of tokens at block ${i}`);
return "";
Expand All @@ -35,6 +39,7 @@ export async function formatChatHistory(
isCurrentIssue: key === createKey(context.payload.issue.html_url),
tokenLimits,
});
// update the token count
tokenLimits.runningTokenCount = currentTokenCount;
tokenLimits.tokensRemaining = tokenLimits.modelMaxTokenLimit - tokenLimits.maxCompletionTokens - currentTokenCount;
return result;
Expand All @@ -44,6 +49,7 @@ export async function formatChatHistory(
return Array.from(new Set(chatHistory)).filter((x): x is string => !!x);
}

// These give structure and provide the distinction between the different sections of the chat history
function getCorrectHeaderString(prDiff: string | null, isCurrentIssue: boolean, isConvo: boolean) {
const strings = {
convo: {
Expand Down Expand Up @@ -90,15 +96,15 @@ async function createContextBlockSection({
tokenLimits: TokenLimits;
}): Promise<[number, string]> {
let comments = streamlined[key];
if (!comments || comments.length === 0) {
// just in case we try again but we should already have the comments
if (!comments || !comments.length) {
const [owner, repo, number] = splitKey(key);
const { comments: fetchedComments } = await fetchIssueComments({
context,
owner,
repo,
issueNum: parseInt(number),
});

comments = streamlineComments(fetchedComments)[key];
}

Expand All @@ -108,8 +114,11 @@ async function createContextBlockSection({
throw context.logger.error("Issue number is not valid");
}

// Fetch our diff if we have one; this excludes the largest of files to keep within token limits
const { diff } = await fetchPullRequestDiff(context, org, repo, issueNumber, tokenLimits);
// specification or pull request body
let specOrBody = specAndBodies[key];
// we should have it already but just in case
if (!specOrBody) {
specOrBody =
(
Expand All @@ -122,49 +131,43 @@ async function createContextBlockSection({
)?.body || "No specification or body available";
}

const specHeader = getCorrectHeaderString(diff, isCurrentIssue, false);
const blockHeader = getCorrectHeaderString(diff, isCurrentIssue, true);
const specHeader = getCorrectHeaderString(diff, isCurrentIssue, false); //E.g: === Current Task Specification ===
const blockHeader = getCorrectHeaderString(diff, isCurrentIssue, true); //E.g: === Linked Task Conversation ===

// contains the actual spec or body
const specBlock = [createHeader(specHeader, key), createSpecOrBody(specOrBody), createFooter(specHeader, key)];
// contains the conversation
const commentSection = createComment({ issueNumber, repo, org, comments }, specOrBody);

let block;
// if we have a conversation, we should include it
if (commentSection) {
block = [specBlock.join("\n"), createHeader(blockHeader, key), commentSection, createFooter(blockHeader, key)];
} else {
// in this scenario we have no task/PR conversation, just the spec
// No need for empty sections in the chat history
block = [specBlock.join("\n")];
}

// only inject the README if this is the current issue as that's likely most relevant
if (isCurrentIssue) {
const readme = await pullReadmeFromRepoForIssue({ context, owner: org, repo });
// give the readme it's own clear section
if (readme) {
const readmeBlock = readme ? [createHeader("README", key), createSpecOrBody(readme), createFooter("README", key)] : [];
block = block.concat(readmeBlock);
}
}

if (!diff) {
// the diff was already encoded etc but we have added more to the block so we need to re-encode
return [await context.adapters.openai.completions.findTokenLength(block.join("")), block.join("\n")];
}

// Build the block with the diff in it's own section
const blockWithDiff = [block.join("\n"), createHeader(`Pull Request Diff`, key), diff, createFooter(`Pull Request Diff`, key)];
return [await context.adapters.openai.completions.findTokenLength(blockWithDiff.join("")), blockWithDiff.join("\n")];
}

/**
* Might not need to splice from the formatted window
function removeSections(fullText: string, header: string, footer: string): string {
const regex = new RegExp(`${escapeRegExp(header)}[\\s\\S]*?${escapeRegExp(footer)}`, 'g');
return fullText.replace(regex, '').trim();
}
function escapeRegExp(text: string): string {
return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
*/

function createHeader(content: string, repoString: string) {
return `=== ${content} === ${repoString} ===\n`;
}
Expand All @@ -184,6 +187,7 @@ function createComment(comment: StreamlinedComments, specOrBody: string) {

const seen = new Set<number>();
comment.comments = comment.comments.filter((c) => {
// Do not include the same comment twice or the spec/body
if (seen.has(c.id) || c.body === specOrBody) {
return false;
}
Expand Down
40 changes: 5 additions & 35 deletions src/helpers/issue-fetching.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,21 @@ import { dedupeStreamlinedComments, fetchCodeLinkedFromIssue, idIssueFromComment
import { handleIssue, handleSpec, handleSpecAndBodyKeys, throttlePromises } from "./issue-handling";
import { processPullRequestDiff } from "./pull-request-parsing";

/**
* Recursively fetches linked issues and processes them, including fetching comments and specifications.
*
* @param params - The parameters required to fetch the linked issues, including context and other details.
* @returns A promise that resolves to an object containing linked issues, specifications, streamlined comments, and seen issue keys.
*/
export async function recursivelyFetchLinkedIssues(params: FetchParams) {
// take a first run at gathering everything we need and package it up
const { linkedIssues, seen, specAndBodies, streamlinedComments } = await fetchLinkedIssues(params);
// build promises and throttle them; this calls handleSpec which is a recursive function potentially to great depth
const fetchPromises = linkedIssues.map(async (linkedIssue) => await mergeCommentsAndFetchSpec(params, linkedIssue, streamlinedComments, specAndBodies, seen));
await throttlePromises(fetchPromises, 10);
// handle the keys that have been gathered
const linkedIssuesKeys = linkedIssues.map((issue) => createKey(`${issue.owner}/${issue.repo}/${issue.issueNumber}`));
// exhaustive list of unique keys from the first full pass
const specAndBodyKeys = Array.from(new Set([...Object.keys(specAndBodies), ...Object.keys(streamlinedComments), ...linkedIssuesKeys]));
// this fn throttles from within but again, be weary of the rate limit
await handleSpecAndBodyKeys(specAndBodyKeys, params, dedupeStreamlinedComments(streamlinedComments), seen);
return { linkedIssues, specAndBodies, streamlinedComments };
}

/**
* Fetches linked issues recursively and processes them.
*
* @param params - The parameters required to fetch the linked issues, including context and other details.
* @returns A promise that resolves to an object containing linked issues, specifications, streamlined comments, and seen issue keys.
*/
export async function fetchLinkedIssues(params: FetchParams) {
const { comments, issue } = await fetchIssueComments(params);
if (!issue) {
Expand Down Expand Up @@ -100,15 +93,6 @@ export async function fetchLinkedIssues(params: FetchParams) {
return { streamlinedComments, linkedIssues, specAndBodies, seen };
}

/**
* Merges comments and fetches the specification for a linked issue.
*
* @param params - The parameters required to fetch the linked issue, including context and other details.
* @param linkedIssue - The linked issue for which comments and specifications need to be fetched.
* @param streamlinedComments - A record of streamlined comments associated with issues.
* @param specOrBodies - A record of specifications or bodies associated with issues.
* @param seen - A set of issue keys that have already been processed to avoid duplication.
*/
export async function mergeCommentsAndFetchSpec(
params: FetchParams,
linkedIssue: LinkedIssues,
Expand Down Expand Up @@ -148,11 +132,6 @@ export async function fetchPullRequestDiff(context: Context, org: string, repo:
return await processPullRequestDiff(diff, tokenLimits);
}

/**
* Fetches an issue from the GitHub API.
* @param params - Context
* @returns A promise that resolves to an issue object or null if an error occurs.
*/
export async function fetchIssue(params: FetchParams): Promise<Issue | null> {
const { octokit, payload, logger } = params.context;
const { issueNum, owner, repo } = params;
Expand Down Expand Up @@ -227,15 +206,6 @@ export async function fetchIssueComments(params: FetchParams) {
};
}

/**
* Fetches and handles an issue based on the provided key and parameters.
*
* @param key - The unique key representing the issue in the format "owner/repo/issueNumber".
* @param params - The parameters required to fetch the issue, including context and other details.
* @param streamlinedComments - A record of streamlined comments associated with issues.
* @param seen - A set of issue keys that have already been processed to avoid duplication.
* @returns A promise that resolves to an array of streamlined comments for the specified issue.
*/
export async function fetchAndHandleIssue(
key: string,
params: FetchParams,
Expand Down
42 changes: 1 addition & 41 deletions src/helpers/issue-handling.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,6 @@ import { StreamlinedComment } from "../types/llm";
import { idIssueFromComment, mergeStreamlinedComments, splitKey } from "./issue";
import { fetchLinkedIssues, fetchIssue, fetchAndHandleIssue, mergeCommentsAndFetchSpec } from "./issue-fetching";

/**
* Handles the processing of an issue.
*
* @param params - The parameters required to fetch and handle issues.
* @param streamlinedComments - A record of streamlined comments indexed by keys.
* @param alreadySeen - A set of keys that have already been processed to avoid duplication.
* @returns A promise that resolves when the issue has been handled.
*/
export async function handleIssue(params: FetchParams, streamlinedComments: Record<string, StreamlinedComment[]>, alreadySeen: Set<string>) {
if (alreadySeen.has(createKey(`${params.owner}/${params.repo}/${params.issueNum}`))) {
return;
Expand All @@ -22,17 +14,6 @@ export async function handleIssue(params: FetchParams, streamlinedComments: Reco
return mergeStreamlinedComments(streamlinedComments, streamlined);
}

/**
* Handles the processing of a specification or body text.
*
* @param params - The parameters required to fetch and handle issues.
* @param specOrBody - The specification or body text to be processed.
* @param specAndBodies - A record of specifications and bodies indexed by keys.
* @param key - The key associated with the current specification or body.
* @param seen - A set of keys that have already been processed to avoid duplication.
* @param streamlinedComments - A record of streamlined comments indexed by keys.
* @returns A promise that resolves to the updated record of specifications and bodies.
*/
export async function handleSpec(
params: FetchParams,
specOrBody: string,
Expand Down Expand Up @@ -73,14 +54,6 @@ export async function handleSpec(
return specAndBodies;
}

/**
* Handles the processing of a comment.
*
* @param params - The parameters required to fetch and handle issues.
* @param comment - The comment to be processed.
* @param streamlinedComments - A record of streamlined comments indexed by keys.
* @param seen - A set of keys that have already been processed to avoid duplication.
*/
export async function handleComment(
params: FetchParams,
comment: StreamlinedComment,
Expand All @@ -100,15 +73,8 @@ export async function handleComment(
}
}

/**
* Handles the processing of specification and body keys.
*
* @param keys - An array of keys representing issues or comments to be processed.
* @param params - The parameters required to fetch and handle issues.
* @param streamlinedComments - A record of streamlined comments indexed by keys.
* @param seen - A set of keys that have already been processed to avoid duplication.
*/
export async function handleSpecAndBodyKeys(keys: string[], params: FetchParams, streamlinedComments: Record<string, StreamlinedComment[]>, seen: Set<string>) {
// Make one last sweep just to be sure we have everything
const commentProcessingPromises = keys.map(async (key) => {
let comments = streamlinedComments[key];
if (!comments || comments.length === 0) {
Expand All @@ -122,12 +88,6 @@ export async function handleSpecAndBodyKeys(keys: string[], params: FetchParams,
await throttlePromises(commentProcessingPromises, 10);
}

/**
* Throttles the execution of promises to ensure that no more than the specified limit are running concurrently.
*
* @param promises - An array of promises to be executed.
* @param limit - The maximum number of promises to run concurrently.
*/
export async function throttlePromises(promises: Promise<void>[], limit: number) {
const executing: Promise<void>[] = [];
for (const promise of promises) {
Expand Down
Loading

0 comments on commit 0324dc9

Please sign in to comment.