Merge pull request ubiquity-os-marketplace#87 from gentlementlegen/fix/truncated-response

gentlementlegen · web-flow · commit 31cb5601fb0c · 2024-08-21T01:33:18.000+09:00
fix: increased max_tokens to avoid truncating responses
diff --git a/package.json b/package.json
@@ -38,10 +38,11 @@
     "decimal.js": "10.4.3",
     "dotenv": "16.4.5",
     "ethers": "^6.13.0",
+    "js-tiktoken": "1.0.14",
     "jsdom": "24.0.0",
     "lodash": "4.17.21",
     "markdown-it": "14.1.0",
-    "openai": "4.29.1",
+    "openai": "4.56.0",
     "tsx": "4.7.1",
     "typebox-validators": "0.3.5",
     "yaml": "2.4.1"
diff --git a/src/parser/content-evaluator-module.ts b/src/parser/content-evaluator-module.ts
@@ -1,17 +1,18 @@
+import { Value } from "@sinclair/typebox/value";
 import Decimal from "decimal.js";
+import { encodingForModel, Tiktoken } from "js-tiktoken";
 import OpenAI from "openai";
+import { commentEnum, CommentType } from "../configuration/comment-types";
 import configuration from "../configuration/config-reader";
 import { OPENAI_API_KEY } from "../configuration/constants";
 import {
   ContentEvaluatorConfiguration,
   contentEvaluatorConfigurationType,
 } from "../configuration/content-evaluator-config";
-import { IssueActivity } from "../issue-activity";
-import { GithubCommentScore, Module, Result } from "./processor";
-import { Value } from "@sinclair/typebox/value";
-import { commentEnum, CommentType } from "../configuration/comment-types";
 import logger from "../helpers/logger";
+import { IssueActivity } from "../issue-activity";
 import openAiRelevanceResponseSchema, { RelevancesByOpenAi } from "../types/openai-type";
+import { GithubCommentScore, Module, Result } from "./processor";
 
 /**
  * Evaluates and rates comments.
@@ -112,30 +113,47 @@ export class ContentEvaluatorModule implements Module {
     return commentsWithScore;
   }
 
+  /**
+   * Will try to predict the maximum of tokens expected, to a maximum of totalTokenLimit.
+   */
+  _calculateMaxTokens(prompt: string, totalTokenLimit: number = 16384) {
+    const tokenizer: Tiktoken = encodingForModel("gpt-4o-2024-08-06");
+    const inputTokens = tokenizer.encode(prompt).length;
+    return Math.min(inputTokens, totalTokenLimit);
+  }
+
+  _generateDummyResponse(comments: { id: number; comment: string }[]) {
+    return comments.reduce<Record<string, number>>((acc, curr) => {
+      return { ...acc, [curr.id]: 0.5 };
+    }, {});
+  }
+
   async _evaluateComments(
     specification: string,
     comments: { id: number; comment: string }[]
   ): Promise<RelevancesByOpenAi> {
     const prompt = this._generatePrompt(specification, comments);
+    const dummyResponse = JSON.stringify(this._generateDummyResponse(comments), null, 2);
+    const maxTokens = this._calculateMaxTokens(dummyResponse);
 
     const response: OpenAI.Chat.ChatCompletion = await this._openAi.chat.completions.create({
-      model: "gpt-4o",
+      model: "gpt-4o-2024-08-06",
       response_format: { type: "json_object" },
       messages: [
         {
           role: "system",
           content: prompt,
         },
       ],
-      temperature: 1,
-      max_tokens: 128,
+      max_tokens: maxTokens,
       top_p: 1,
+      temperature: 1,
       frequency_penalty: 0,
       presence_penalty: 0,
     });
 
     const rawResponse = String(response.choices[0].message.content);
-    logger.info(`OpenAI raw response: ${rawResponse}`);
+    logger.info(`OpenAI raw response (using max_tokens: ${maxTokens}): ${rawResponse}`);
 
     const jsonResponse = JSON.parse(rawResponse);
 
diff --git a/yarn.lock b/yarn.lock
@@ -3700,10 +3700,10 @@ balanced-match@^1.0.0:
   resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee"
   integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==
 
-base-64@^0.1.0:
-  version "0.1.0"
-  resolved "https://registry.yarnpkg.com/base-64/-/base-64-0.1.0.tgz#780a99c84e7d600260361511c4877613bf24f6bb"
-  integrity sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==
+base64-js@^1.5.1:
+  version "1.5.1"
+  resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a"
+  integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==
 
 bech32@1.1.4:
   version "1.1.4"
@@ -4456,14 +4456,6 @@ diff@^4.0.1:
   resolved "https://registry.yarnpkg.com/diff/-/diff-4.0.2.tgz#60f3aecb89d5fae520c11aa19efc2bb982aade7d"
   integrity sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==
 
-digest-fetch@^1.3.0:
-  version "1.3.0"
-  resolved "https://registry.yarnpkg.com/digest-fetch/-/digest-fetch-1.3.0.tgz#898e69264d00012a23cf26e8a3e40320143fc661"
-  integrity sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==
-  dependencies:
-    base-64 "^0.1.0"
-    md5 "^2.3.0"
-
 dir-glob@^3.0.1:
   version "3.0.1"
   resolved "https://registry.yarnpkg.com/dir-glob/-/dir-glob-3.0.1.tgz#56dbf73d992a4a93ba1584f4534063fd2e41717f"
@@ -6284,6 +6276,13 @@ js-sha3@0.8.0:
   resolved "https://registry.yarnpkg.com/js-sha3/-/js-sha3-0.8.0.tgz#b9b7a5da73afad7dedd0f8c463954cbde6818840"
   integrity sha512-gF1cRrHhIzNfToc802P800N8PpXS+evLLXfsVpowqmAFR9uwbi89WvXg2QspOmXL8QL86J4T1EpFu+yUkwJY3Q==
 
+js-tiktoken@1.0.14:
+  version "1.0.14"
+  resolved "https://registry.yarnpkg.com/js-tiktoken/-/js-tiktoken-1.0.14.tgz#756f353262d559da16b58b5bcecfd93330076da2"
+  integrity sha512-Pk3l3WOgM9joguZY2k52+jH82RtABRgB5RdGFZNUGbOKGMVlNmafcPA3b0ITcCZPu1L9UclP1tne6aw7ZI4Myg==
+  dependencies:
+    base64-js "^1.5.1"
+
 js-tokens@^4.0.0:
   version "4.0.0"
   resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499"
@@ -7159,20 +7158,18 @@ onetime@^6.0.0:
   dependencies:
     mimic-fn "^4.0.0"
 
-openai@4.29.1:
-  version "4.29.1"
-  resolved "https://registry.yarnpkg.com/openai/-/openai-4.29.1.tgz#89d981f8ced9d1d0db2e09ca1b16b0d7775dcf36"
-  integrity sha512-vvKRIgB4/7w48PGVbeR8OceH/PT6fRo4sTIjRC7+y7WoK7by1R0cXs2SZRx4KsEh0ZB8J0eqdVIdRgs8XzeoEg==
+openai@4.56.0:
+  version "4.56.0"
+  resolved "https://registry.yarnpkg.com/openai/-/openai-4.56.0.tgz#07d3982544cabd5781127288a8dfcceb7319a4cf"
+  integrity sha512-zcag97+3bG890MNNa0DQD9dGmmTWL8unJdNkulZzWRXrl+QeD+YkBI4H58rJcwErxqGK6a0jVPZ4ReJjhDGcmw==
   dependencies:
     "@types/node" "^18.11.18"
     "@types/node-fetch" "^2.6.4"
     abort-controller "^3.0.0"
     agentkeepalive "^4.2.1"
-    digest-fetch "^1.3.0"
     form-data-encoder "1.7.2"
     formdata-node "^4.3.2"
     node-fetch "^2.6.7"
-    web-streams-polyfill "^3.2.1"
 
 optionator@^0.9.3:
   version "0.9.4"
@@ -8677,7 +8674,7 @@ web-streams-polyfill@4.0.0-beta.3:
   resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz#2898486b74f5156095e473efe989dcf185047a38"
   integrity sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==
 
-web-streams-polyfill@^3.0.3, web-streams-polyfill@^3.2.1:
+web-streams-polyfill@^3.0.3:
   version "3.3.3"
   resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz#2073b91a2fdb1fbfbd401e7de0ac9f8214cecb4b"
   integrity sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==