From 9075b90008dc1354e21189ffdbf80713653214a2 Mon Sep 17 00:00:00 2001 From: Keyrxng <106303466+Keyrxng@users.noreply.github.com> Date: Thu, 24 Oct 2024 04:25:37 +0100 Subject: [PATCH] feat: dynamic ground truths --- src/handlers/find-ground-truths.ts | 86 ++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/handlers/find-ground-truths.ts diff --git a/src/handlers/find-ground-truths.ts b/src/handlers/find-ground-truths.ts new file mode 100644 index 0000000..c861463 --- /dev/null +++ b/src/handlers/find-ground-truths.ts @@ -0,0 +1,86 @@ +import OpenAI from "openai"; +import { Context } from "../types"; +import { logger } from "../helpers/errors"; + +const FIND_GROUND_TRUTHS_SYSTEM_MESSAGE = `Using the input provided, your goal is to produce an array of strings that represent "Ground Truths." + These ground truths are high-level abstractions that encapsulate the key aspects of the task. + They serve to guide and inform our code review model's interpretation of the task by providing clear, concise, and explicit insights. + + Each ground truth should: + - Be succinct and easy to understand. + - Directly pertain to the task at hand. + - Focus on essential requirements, behaviors, or assumptions involved in the task. + + Example: + Task: Implement a function that adds two numbers. + Ground Truths: + - The function should accept two numerical inputs. + - The function should return the sum of the two inputs. + - Inputs must be validated to ensure they are numbers. + + Based on the given task, generate similar ground truths adhering to a maximum of 10. + + Return a JSON parsable array of strings representing the ground truths, without comment or directive.`; + +function validateGroundTruths(truthsString: string): string[] { + let truths; + try { + truths = JSON.parse(truthsString); + } catch (err) { + throw logger.error("Failed to parse ground truths"); + } + if (!Array.isArray(truths)) { + throw logger.error("Ground truths must be an array"); + } + + if (truths.length > 10) { + throw logger.error("Ground truths must not exceed 10"); + } + + truths.forEach((truth: string) => { + if (typeof truth !== "string") { + throw logger.error("Each ground truth must be a string"); + } + }); + + return truths; +} + +export async function findGroundTruths(context: Context, groundTruthSource: string) { + const { + env: { OPENAI_API_KEY }, + config: { openAiBaseUrl, model }, + } = context; + + const openAi = new OpenAI({ + apiKey: OPENAI_API_KEY, + ...(openAiBaseUrl && { baseURL: openAiBaseUrl }), + }); + + const res = await openAi.chat.completions.create({ + messages: [ + { + role: "system", + content: FIND_GROUND_TRUTHS_SYSTEM_MESSAGE, + }, + { + role: "user", + content: groundTruthSource, + }, + ], + /** + * I've used the config model here but in my opinion, + * we should optimize this for a quicker response which + * means no advanced reasoning models. rfc + */ + model: model, + }); + + const output = res.choices[0].message.content; + + if (!output) { + throw logger.error("Failed to produce a ground truths response"); + } + + return validateGroundTruths(output); +}