Skip to content

Commit

Permalink
Merge pull request ubiquity-os-marketplace#21 from sshivaditya2019/de…
Browse files Browse the repository at this point in the history
…velopment
  • Loading branch information
0x4007 authored Oct 28, 2024
2 parents 2a1e15b + 30f876c commit ec32970
Show file tree
Hide file tree
Showing 10 changed files with 567 additions and 98 deletions.
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"knip-ci": "knip --no-exit-code --reporter json --config .github/knip.ts",
"prepare": "husky install",
"test": "jest --setupFiles dotenv/config --coverage",
"worker": "wrangler dev --env dev --port 4000"
"worker": "wrangler dev --env dev --port 5000"
},
"keywords": [
"typescript",
Expand All @@ -34,6 +34,8 @@
"@supabase/supabase-js": "^2.45.4",
"@ubiquity-dao/ubiquibot-logger": "^1.3.0",
"dotenv": "^16.4.5",
"github-diff-tool": "^1.0.6",
"gpt-tokenizer": "^2.5.1",
"openai": "^4.63.0",
"typebox-validators": "0.3.5",
"voyageai": "^0.0.1-5"
Expand Down
19 changes: 12 additions & 7 deletions src/adapters/openai/helpers/completions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import OpenAI from "openai";
import { Context } from "../../../types";
import { SuperOpenAi } from "./openai";
import { CompletionsModelHelper, ModelApplications } from "../../../types/llm";
const MAX_TOKENS = 7000;
import { encode } from "gpt-tokenizer";

export interface CompletionsType {
answer: string;
Expand All @@ -28,7 +28,8 @@ export class Completions extends SuperOpenAi {
additionalContext: string[],
localContext: string[],
groundTruths: string[],
botName: string
botName: string,
maxTokens: number
): Promise<CompletionsType> {
const res: OpenAI.Chat.Completions.ChatCompletion = await this.client.chat.completions.create({
model: model,
Expand All @@ -46,10 +47,10 @@ export class Completions extends SuperOpenAi {
"Your name is : " +
botName +
"\n" +
"Primary Context: " +
additionalContext.join("\n") +
"\nLocal Context: " +
localContext.join("\n"),
"Main Context (Provide additional precedence in terms of information): " +
localContext.join("\n") +
"Secondary Context: " +
additionalContext.join("\n"),
},
],
},
Expand All @@ -64,7 +65,7 @@ export class Completions extends SuperOpenAi {
},
],
temperature: 0.2,
max_tokens: MAX_TOKENS,
max_tokens: maxTokens,
top_p: 0.5,
frequency_penalty: 0,
presence_penalty: 0,
Expand Down Expand Up @@ -117,4 +118,8 @@ export class Completions extends SuperOpenAi {

return res.choices[0].message.content;
}

async findTokenLength(prompt: string, additionalContext: string[] = [], localContext: string[] = [], groundTruths: string[] = []): Promise<number> {
return encode(prompt + additionalContext.join("\n") + localContext.join("\n") + groundTruths.join("\n")).length;
}
}
24 changes: 14 additions & 10 deletions src/handlers/ask-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import { CommentSimilaritySearchResult } from "../adapters/supabase/helpers/comm
import { IssueSimilaritySearchResult } from "../adapters/supabase/helpers/issues";
import { recursivelyFetchLinkedIssues } from "../helpers/issue-fetching";
import { formatChatHistory } from "../helpers/format-chat-history";
import { optimizeContext } from "../helpers/issue";
import { fetchRepoDependencies, fetchRepoLanguageStats } from "./ground-truths/chat-bot";
import { findGroundTruths } from "./ground-truths/find-ground-truths";

Expand Down Expand Up @@ -39,7 +38,7 @@ export async function askQuestion(context: Context, question: string) {
export async function askGpt(context: Context, question: string, formattedChat: string[]): Promise<CompletionsType> {
const {
env: { UBIQUITY_OS_APP_NAME },
config: { model, similarityThreshold },
config: { model, similarityThreshold, maxTokens },
} = context;
let similarComments: CommentSimilaritySearchResult[] = [];
let similarIssues: IssueSimilaritySearchResult[] = [];
Expand All @@ -58,20 +57,25 @@ export async function askGpt(context: Context, question: string, formattedChat:
// Remove Null Results (Private Comments)
similarText = similarText.filter((text) => text !== null);
formattedChat = formattedChat.filter((text) => text !== null);
// Optimize the context
formattedChat = optimizeContext(formattedChat);
// ReRank the results based on the question
// const reRankedChat = formattedChat.length > 0 ? await context.adapters.voyage.reranker.reRankResults(formattedChat.filter(text => text !== ""), question, 300) : [];
similarText = similarText.filter((text) => text !== "");
const rerankedText = similarText.length > 0 ? await context.adapters.voyage.reranker.reRankResults(similarText, question) : [];

const languages = await fetchRepoLanguageStats(context);
const { dependencies, devDependencies } = await fetchRepoDependencies(context);
let dependencies = {};
let devDependencies = {};
try {
const deps = await fetchRepoDependencies(context);
dependencies = deps.dependencies;
devDependencies = deps.devDependencies;
} catch (error) {
context.logger.error(`Unable to Fetch Dependencies: ${(error as Error).message}`);
}
const groundTruths = await findGroundTruths(context, "chat-bot", {
languages,
dependencies,
devDependencies,
});

return context.adapters.openai.completions.createCompletion(question, model, rerankedText, formattedChat, groundTruths, UBIQUITY_OS_APP_NAME);
//Calculate the current context size in tokens
const numTokens = await context.adapters.openai.completions.findTokenLength(question, rerankedText, formattedChat, groundTruths);
context.logger.info(`Number of tokens: ${numTokens}`);
return context.adapters.openai.completions.createCompletion(question, model, rerankedText, formattedChat, groundTruths, UBIQUITY_OS_APP_NAME, maxTokens);
}
34 changes: 26 additions & 8 deletions src/helpers/format-chat-history.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Context } from "../types";
import { StreamlinedComment, StreamlinedComments } from "../types/llm";
import { createKey, streamlineComments } from "../handlers/comments";
import { fetchPullRequestDiff, fetchIssue, fetchIssueComments } from "./issue-fetching";
import { fetchPullRequestDiff, fetchIssue, fetchIssueComments, fetchLinkedPullRequests } from "./issue-fetching";
import { splitKey } from "./issue";

/**
Expand All @@ -18,10 +18,13 @@ export async function formatChatHistory(
specAndBodies: Record<string, string>
): Promise<string[]> {
const keys = new Set([...Object.keys(streamlined), ...Object.keys(specAndBodies), createKey(context.payload.issue.html_url)]);
let runningTokenCount = 0;
const chatHistory = await Promise.all(
Array.from(keys).map(async (key) => {
const isCurrentIssue = key === createKey(context.payload.issue.html_url);
return createContextBlockSection(context, key, streamlined, specAndBodies, isCurrentIssue);
const [currentTokenCount, result] = await createContextBlockSection(context, key, streamlined, specAndBodies, isCurrentIssue, runningTokenCount);
runningTokenCount += currentTokenCount;
return result;
})
);
return Array.from(new Set(chatHistory));
Expand Down Expand Up @@ -65,8 +68,10 @@ async function createContextBlockSection(
key: string,
streamlined: Record<string, StreamlinedComment[]>,
specAndBodies: Record<string, string>,
isCurrentIssue: boolean
) {
isCurrentIssue: boolean,
currentContextTokenCount: number = 0
): Promise<[number, string]> {
const maxTokens = context.config.maxTokens;
let comments = streamlined[key];
if (!comments || comments.length === 0) {
const [owner, repo, number] = splitKey(key);
Expand All @@ -83,7 +88,18 @@ async function createContextBlockSection(
if (!issueNumber || isNaN(issueNumber)) {
throw context.logger.error("Issue number is not valid");
}
const prDiff = await fetchPullRequestDiff(context, org, repo, issueNumber);
const pulls = (await fetchLinkedPullRequests(org, repo, issueNumber, context)) || [];
const prDiffs = await Promise.all(pulls.map((pull) => fetchPullRequestDiff(context, org, repo, pull.number)));
let prDiff: string | null = null;
for (const pullDiff of prDiffs.flat()) {
if (currentContextTokenCount > maxTokens) break;
if (pullDiff) {
const tokenLength = await context.adapters.openai.completions.findTokenLength(pullDiff.diff);
if (currentContextTokenCount + tokenLength > maxTokens) break;
currentContextTokenCount += tokenLength;
prDiff = (prDiff ? prDiff + "\n" : "") + pullDiff.diff;
}
}
const specHeader = getCorrectHeaderString(prDiff, issueNumber, isCurrentIssue, false);
let specOrBody = specAndBodies[key];
if (!specOrBody) {
Expand All @@ -98,14 +114,16 @@ async function createContextBlockSection(
)?.body || "No specification or body available";
}
const specOrBodyBlock = [createHeader(specHeader, key), createSpecOrBody(specOrBody), createFooter(specHeader)];
currentContextTokenCount += await context.adapters.openai.completions.findTokenLength(specOrBody);
const header = getCorrectHeaderString(prDiff, issueNumber, isCurrentIssue, true);
const repoString = `${org}/${repo} #${issueNumber}`;
const block = [specOrBodyBlock.join(""), createHeader(header, repoString), createComment({ issueNumber, repo, org, comments }), createFooter(header)];
currentContextTokenCount += await context.adapters.openai.completions.findTokenLength(block.join(" "));
if (!prDiff) {
return block.join("");
return [currentContextTokenCount, block.join("")];
}
const diffBlock = [createHeader("Linked Pull Request Code Diff", repoString), prDiff, createFooter("Linked Pull Request Code Diff")];
return block.concat(diffBlock).join("");
const diffBlock = [createHeader("Linked Pull Request Code Diff", repoString), prDiff, createFooter("\nLinked Pull Request Code Diff")];
return [currentContextTokenCount, block.join("") + diffBlock.join("")];
}

/**
Expand Down
85 changes: 70 additions & 15 deletions src/helpers/issue-fetching.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { GithubDiff } from "github-diff-tool";
import { createKey, getAllStreamlinedComments } from "../handlers/comments";
import { Context } from "../types";
import { IssueWithUser, SimplifiedComment, User } from "../types/github-types";
import { IssueWithUser, LinkedPullsToIssue, SimplifiedComment, User } from "../types/github-types";
import { FetchParams, Issue, Comments, LinkedIssues } from "../types/github-types";
import { StreamlinedComment } from "../types/llm";
import {
Expand Down Expand Up @@ -164,18 +165,36 @@ export async function mergeCommentsAndFetchSpec(
* @param issue - The pull request number.
* @returns A promise that resolves to the diff of the pull request as a string, or null if an error occurs.
*/
export async function fetchPullRequestDiff(context: Context, org: string, repo: string, issue: number): Promise<string | null> {
export async function fetchPullRequestDiff(context: Context, org: string, repo: string, issue: number): Promise<{ diff: string; diffSize: number }[] | null> {
const { octokit, logger } = context;
try {
const { data } = await octokit.pulls.get({
owner: org,
repo,
pull_number: issue,
mediaType: {
format: "diff",
},
});
return data as unknown as string;
const githubDiff = new GithubDiff(octokit);
//Fetch the statistics of the pull request
const stats = await githubDiff.getPullRequestStats(org, repo, issue);
const files = stats.map((file) => ({ filename: file.filename, diffSizeInBytes: file.diffSizeInBytes }));
//Fetch the diff of the files
const prDiffs = await Promise.all(
files.map(async (file) => {
let diff = null;
try {
diff = await githubDiff.getPullRequestDiff({
owner: org,
repo,
pullNumber: issue,
filePath: file.filename,
});
} catch {
logger.error(`Error fetching pull request diff for the file`, {
owner: org,
repo,
pull_number: issue,
file: file.filename,
});
}
return diff ? { diff: file.filename + diff, diffSize: file.diffSizeInBytes } : null;
})
);
return prDiffs.filter((diff): diff is { diff: string; diffSize: number } => diff !== null).sort((a, b) => a.diffSize - b.diffSize);
} catch (error) {
logger.error(`Error fetching pull request diff`, {
error: error as Error,
Expand All @@ -188,10 +207,9 @@ export async function fetchPullRequestDiff(context: Context, org: string, repo:
}

/**
* Fetches the details of a pull request.
*
* @param params - The parameters required to fetch the pull request, including context and other details.
* @returns A promise that resolves to the pull request details or null if an error occurs.
* Fetches an issue from the GitHub API.
* @param params - Context
* @returns A promise that resolves to an issue object or null if an error occurs.
*/
export async function fetchIssue(params: FetchParams): Promise<Issue | null> {
const { octokit, payload, logger } = params.context;
Expand Down Expand Up @@ -296,3 +314,40 @@ function castCommentsToSimplifiedComments(comments: Comments, params: FetchParam
url: comment.html_url,
}));
}

export async function fetchLinkedPullRequests(owner: string, repo: string, issueNumber: number, context: Context) {
const query = `
query($owner: String!, $repo: String!, $issueNumber: Int!) {
repository(owner: $owner, name: $repo) {
issue(number: $issueNumber) {
closedByPullRequestsReferences(first: 100) {
nodes {
number
title
state
merged
url
}
}
}
}
}
`;

try {
const { repository } = await context.octokit.graphql<LinkedPullsToIssue>(query, {
owner,
repo,
issueNumber,
});
return repository.issue.closedByPullRequestsReferences.nodes;
} catch (error) {
context.logger.error(`Error fetching linked PRs from issue`, {
error: error as Error,
owner,
repo,
issueNumber,
});
return null;
}
}
55 changes: 0 additions & 55 deletions src/helpers/issue.ts
Original file line number Diff line number Diff line change
Expand Up @@ -166,61 +166,6 @@ export async function fetchCodeLinkedFromIssue(
}));
}

/**
* Optimizes the context strings by removing duplicates and sorting by information density.
* Removes exact duplicates and sorts by information density and length.
*
* @param strings - The array of context strings to optimize.
* @returns The optimized array of context strings.
*/
export function optimizeContext(strings: string[]): string[] {
// Helper function to clean strings while preserving links
function cleanString(inputString: string): string {
// Preserve links by temporarily replacing them
const links: string[] = [];
inputString = inputString.replace(/https?:\/\/\S+/g, (match) => {
links.push(match);
return `__LINK${links.length - 1}__`;
});
// Clean the string
inputString = inputString
.replace(/[^\w\s-/]|_/g, "") // Remove punctuation except '-' and '/'
.replace(/\s+/g, " ")
.trim()
.toLowerCase();
// Restore links
inputString = inputString.replace(/__LINK(\d+)__/g, (i) => links[parseInt(i)]);

return inputString;
}
// Helper function to calculate information density
function informationDensity(s: string): number {
const words = s.split(/\s+/);
const uniqueWords = new Set(words);
return uniqueWords.size / words.length;
}
// Clean and remove empty strings
const cleanedStrings = strings.map(cleanString).filter((s) => s.length > 0);
// Remove exact duplicates
const uniqueStrings = Array.from(new Set(cleanedStrings));
// Sort strings by information density and length
uniqueStrings.sort((a, b) => {
const densityDiff = informationDensity(b) - informationDensity(a);
return densityDiff !== 0 ? densityDiff : b.length - a.length;
});
const result: string[] = [];
const wordSet = new Set<string>();
for (const str of uniqueStrings) {
const words = str.split(/\s+/);
const newWords = words.filter((word) => !wordSet.has(word) && !word.startsWith("http"));
if (newWords.length > 0 || str.includes("http")) {
result.push(str);
newWords.forEach((word) => wordSet.add(word));
}
}
return result;
}

/**
* Extracts and returns the README content from the repository associated with the given issue.
*
Expand Down
18 changes: 18 additions & 0 deletions src/types/github-types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,21 @@ export type FetchedCodes = {
repo: string;
issueNumber: number;
};

export type FetchedPulls = {
number: number;
title: string;
state: string;
merged: boolean;
url: string;
};

export type LinkedPullsToIssue = {
repository: {
issue: {
closedByPullRequestsReferences: {
nodes: FetchedPulls[];
};
};
};
};
Loading

0 comments on commit ec32970

Please sign in to comment.