v1.2.0 update: limit send token count.

Aincvy · Sep 22, 2023 · e8c9202 · e8c9202
1 parent 8f42916
commit e8c9202
Show file tree

Hide file tree

Showing 5 changed files with 115 additions and 30 deletions.
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -2,7 +2,7 @@
   "name": "fauxpilot",
   "displayName": "Fauxpilot",
   "description": "Get completions from Fauxpilot server",
-  "version": "1.1.9",
+  "version": "1.2.0",
   "icon": "assets/icon.png",
   "keywords": [
     "code-suggestion",
@@ -109,6 +109,16 @@
             "openai",
             "axios"
           ]
+        },
+        "fauxpilot.serverMaxTokens": {
+          "type": "integer",
+          "default": 1525,
+          "description": "The maximum token count for a single request to the target server. It is calculated locally using `@dqbd/tiktoken` and may differ from the server's calculation method. Before sending, it will be ensured that the number of tokens sent is less than this value. 1525 is for 2048(server)"
+        },
+        "fauxpilot.reduceLineStep": {
+          "type": "integer",
+          "default": 1,
+          "description": "The number of lines to reduce in each iteration when adjusting the token count."
         }
       }
     }
@@ -144,6 +154,7 @@
     "webpack-cli": "^4.10.0"
   },
   "dependencies": {
+    "@dqbd/tiktoken": "^1.0.7",
     "axios": "^1.5.0",
     "http": "^0.0.1-security",
     "openai": "^4.3.1"

diff --git a/src/FauxpilotClient.ts b/src/FauxpilotClient.ts
@@ -22,11 +22,14 @@ export class FauxpilotClient {
     private requestType = RequestType.OpenAI;
     private maxLines: number;
     private responseStatus: FetchResponseStatus;
+    private serverMaxTokens: number;
+    private leadingLinesRatio: number;
+    private reduceLineStep: number;
 
     public version: string;
+
 
     constructor() {
-        // this.outputChannel = null;
         this.excludeFileExts = [];
         this.baseUrl = '';
         this.model = '<<UNSET>>';
@@ -36,7 +39,10 @@ export class FauxpilotClient {
         this.version = '';
         this.token = '';
         this.maxLines = 150;
-        this.responseStatus = new FetchResponseStatus(200,'');
+        this.responseStatus = new FetchResponseStatus(200, '');
+        this.serverMaxTokens = 2048;
+        this.leadingLinesRatio = 0.185;
+        this.reduceLineStep = 1;
     }
 
     public init(extConfig: WorkspaceConfiguration, channel: OutputChannel) {
@@ -68,6 +74,8 @@ export class FauxpilotClient {
         this.token = extConfig.get("token", '');
         this.requestType = extConfig.get("requestType", 'openai') === 'openai' ? RequestType.OpenAI : RequestType.Aixos;
         this.maxLines = extConfig.get("maxLines", 150);
+        this.serverMaxTokens = extConfig.get("serverMaxTokens", 2048);
+        this.reduceLineStep = extConfig.get("reduceLineStep", 1);
 
         this.log(`enabled = ${this.enabled}`);
         this.log(`baseUrl = ${this.baseUrl}`);
@@ -80,6 +88,8 @@ export class FauxpilotClient {
         this.log(`token = ${this.token}`);
         this.log(`requestType = ${this.requestType}`);
         this.log(`maxLines = ${this.maxLines}`);
+        this.log(`serverMaxTokens = ${this.serverMaxTokens}`);
+        this.log(`reduceLineStep = ${this.reduceLineStep}`);
 
         rebuildAccessBackendCache();
         this.log("reload config finish.");
@@ -155,6 +165,19 @@ export class FauxpilotClient {
         this.responseStatus = value;
     }
 
+    public get ServerMaxTokens(): number {
+        return this.serverMaxTokens;
+    }
+
+    public get LeadingLinesRatio(): number {
+        return this.leadingLinesRatio;
+    }
+
+    public get ReduceLineStep(): number {
+        return this.reduceLineStep;
+    }
+
+
 }
 
 const client = new FauxpilotClient();

diff --git a/src/FauxpilotCompletionProvider.ts b/src/FauxpilotCompletionProvider.ts
@@ -5,7 +5,7 @@ import {
     TextDocument, workspace, StatusBarItem, OutputChannel, WorkspaceConfiguration, InlineCompletionTriggerKind
 } from 'vscode';
 
-import { nextId,delay } from './Utils';
+import { nextId,delay, limitTextLength } from './Utils';
 import { LEADING_LINES_PROP } from './Constants';
 import { fauxpilotClient } from './FauxpilotClient';
 import { fetch } from './AccessBackend';
@@ -55,7 +55,7 @@ export class FauxpilotCompletionProvider implements InlineCompletionItemProvider
                 return;
             }
 
-            const prompt = this.getPrompt(document, position);
+            const prompt = limitTextLength(document, position);
             let suggestionDelay = fauxpilotClient.SuggestionDelay;
             if (suggestionDelay > 0) {
                 let holdPressId = ++this.userPressKeyCount;
@@ -150,29 +150,7 @@ export class FauxpilotCompletionProvider implements InlineCompletionItemProvider
         }
     }
 
-    private getPrompt(document: TextDocument, position: Position): string {
-        const promptLinesCount = fauxpilotClient.MaxLines;
-
-        /* 
-        Put entire file in prompt if it's small enough, otherwise only
-        take lines above the cursor and from the beginning of the file.
-        */
-
-        // Only determine the content before the cursor
-        const currentLine = position.line;                 //  document.lineCount
-        if (currentLine <= promptLinesCount) {
-            const range = new Range(0, 0, position.line, position.character);
-            return document.getText(range);
-        } else {
-            const leadingLinesCount = Math.floor(LEADING_LINES_PROP * promptLinesCount);
-            const prefixLinesCount = promptLinesCount - leadingLinesCount;
-            const firstPrefixLine = Math.max(position.line - prefixLinesCount, 0);
-
-            const leading = document.getText(new Range(0, 0, leadingLinesCount, 200));
-            const prefix = document.getText(new Range(firstPrefixLine, 0, position.line, position.character));
-            return `${leading}\n${prefix}`;
-        }
-    }
+
 
     private isNil(value: String | undefined | null): boolean {
         return value === undefined || value === null || value.length === 0;

diff --git a/src/Utils.ts b/src/Utils.ts
@@ -1,3 +1,7 @@
+import { encoding_for_model } from "@dqbd/tiktoken";
+import { TextDocument, Position, Range } from "vscode";
+import { fauxpilotClient } from "./FauxpilotClient";
+
 let poorManUuid = 0;
 
 // Placeholder for a real Unique ID function
@@ -20,3 +24,61 @@ export function currentTimeString(): string {
     return `[${hours}:${minutes}:${seconds}:${ms}]`;
 }
 
+function numTokensFromString(message: string) {
+    const encoder = encoding_for_model("gpt-3.5-turbo");
+
+    const tokens = encoder.encode(message);
+    encoder.free();
+    return tokens.length;
+}
+
+// limit text length by serverMaxTokens
+export function limitTextLength(doc: TextDocument, pos: Position): string {
+    // 
+    let headRatio = fauxpilotClient.LeadingLinesRatio;
+    let promptLinesCount = fauxpilotClient.MaxLines;
+    const step = fauxpilotClient.ReduceLineStep;
+    const ratioReduce = step / promptLinesCount;
+
+    while (true) {
+        const str = getPrompt(doc, pos, headRatio, promptLinesCount);
+        if (!str || (typeof str === 'string' && str.length <= 0)) {
+            return '';
+        }
+
+        const tokenCount = numTokensFromString(str); 
+        if (tokenCount < fauxpilotClient.ServerMaxTokens) {
+            fauxpilotClient.log(`send token count: ${tokenCount}`);
+            return str;
+        }
+
+        // reduce 2 line once
+        if ((promptLinesCount -= step) <= 0) {
+            return '';
+        }
+
+        headRatio = Math.max(0.105, headRatio - ratioReduce);
+
+        fauxpilotClient.log(`reach max token count, current token count: ${tokenCount}, promptLinesCount: ${promptLinesCount}, headRatio: ${headRatio}`);
+    }
+
+    return '';
+}
+
+function getPrompt(document: TextDocument, position: Position, headRatio: number, promptLinesCount: number): string {
+
+    // Only determine the content before the cursor
+    const currentLine = position.line;                 //  document.lineCount
+    if (currentLine <= promptLinesCount) {
+        const range = new Range(0, 0, position.line, position.character);
+        return document.getText(range);
+    } else {
+        const leadingLinesCount = Math.floor(headRatio * promptLinesCount);
+        const prefixLinesCount = promptLinesCount - leadingLinesCount;
+        const firstPrefixLine = Math.max(position.line - prefixLinesCount, 0);
+
+        const leading = document.getText(new Range(0, 0, leadingLinesCount, 200));
+        const prefix = document.getText(new Range(firstPrefixLine, 0, position.line, position.character));
+        return `${leading}\n${prefix}`;
+    }
+}