llm: validate user-defined model data and better typing/bugfix

haraldschilly · haraldschilly · commit d1ddc8d895a2 · 2025-12-23T16:14:06.000+01:00
diff --git a/src/packages/frontend/account/user-defined-llm.tsx b/src/packages/frontend/account/user-defined-llm.tsx
@@ -4,6 +4,7 @@ import {
   Flex,
   Form,
   Input,
+  InputNumber,
   List,
   Modal,
   Popconfirm,
@@ -33,6 +34,7 @@ import { LanguageModelVendorAvatar } from "@cocalc/frontend/components/language-
 import { webapp_client } from "@cocalc/frontend/webapp-client";
 import { OTHER_SETTINGS_USER_DEFINED_LLM as KEY } from "@cocalc/util/db-schema/defaults";
 import {
+  FALLBACK_MAX_TOKENS,
   LLM_PROVIDER,
   SERVICES,
   UserDefinedLLM,
@@ -350,6 +352,26 @@ export function UserDefinedLLMComponent({ style, on_change }: Props) {
           >
             <Input />
           </Form.Item>
+          <Form.Item
+            label="Max Tokens"
+            name="max_tokens"
+            help={`Context window size in tokens. Leave empty to use default (${FALLBACK_MAX_TOKENS}). Valid range: 1000-2000000.`}
+            rules={[
+              {
+                type: "number",
+                min: 1000,
+                max: 2000000,
+                message: "Must be between 1000 and 2000000",
+              },
+            ]}
+          >
+            <InputNumber
+              min={1000}
+              max={2000000}
+              placeholder={`${FALLBACK_MAX_TOKENS} (default)`}
+              style={{ width: "100%" }}
+            />
+          </Form.Item>
         </Form>
       </Modal>
     );
diff --git a/src/packages/frontend/misc/llm.ts b/src/packages/frontend/misc/llm.ts
@@ -9,7 +9,11 @@ import { estimateTokenCount, sliceByTokens } from "tokenx";
 
 import type { History } from "@cocalc/frontend/client/types";
 import type { LanguageModel } from "@cocalc/util/db-schema/llm-utils";
-import { getMaxTokens } from "@cocalc/util/db-schema/llm-utils";
+import {
+  getMaxTokens,
+  isUserDefinedModel,
+} from "@cocalc/util/db-schema/llm-utils";
+import { getUserDefinedLLMByModel } from "@cocalc/frontend/frame-editors/llm/use-userdefined-llm";
 
 import { timed } from "./timing";
 
@@ -95,7 +99,12 @@ const truncateHistoryImpl = (
   if (maxTokens <= 0) {
     return [];
   }
-  const modelMaxTokens = getMaxTokens(model);
+  // Try to get user-defined config if this is a user model
+  const userConfig = isUserDefinedModel(model)
+    ? getUserDefinedLLMByModel(model)
+    : null;
+
+  const modelMaxTokens = getMaxTokens(model, userConfig ?? undefined);
   const maxLength = modelMaxTokens * APPROX_CHARACTERS_PER_TOKEN;
   for (let i = 0; i < history.length; i++) {
     // Performance: ensure all entries in history are reasonably short, so they don't
diff --git a/src/packages/frontend/test/setup.js b/src/packages/frontend/test/setup.js
@@ -1,2 +1,11 @@
 require("@testing-library/jest-dom");
 process.env.COCALC_TEST_MODE = true;
+
+// Polyfill TextEncoder and TextDecoder for Jest/jsdom environment
+// These are needed by @msgpack/msgpack and other libraries
+const { TextEncoder, TextDecoder } = require("util");
+global.TextEncoder = TextEncoder;
+global.TextDecoder = TextDecoder;
+
+// Define DEBUG global (normally provided by rspack in production)
+global.DEBUG = false;
diff --git a/src/packages/server/llm/evaluate-lc.ts b/src/packages/server/llm/evaluate-lc.ts
@@ -11,12 +11,14 @@ import { ServerSettings } from "@cocalc/database/settings/server-settings";
 import {
   ANTHROPIC_VERSION,
   AnthropicModel,
+  FALLBACK_MAX_TOKENS,
   fromCustomOpenAIModel,
   GOOGLE_MODEL_TO_ID,
   GoogleModel,
   isAnthropicModel,
   isCustomOpenAI,
   isGoogleModel,
+  isGoogleThinkingModel,
   isMistralModel,
   isOpenAIModel,
   isXaiModel,
@@ -172,10 +174,9 @@ export const PROVIDER_CONFIGS = {
       return new ChatGoogleGenerativeAI({
         model: modelName,
         apiKey,
-        maxOutputTokens: options.maxTokens,
-        // Only enable thinking tokens for Gemini 2.5 models
-        ...(modelName === "gemini-2.5-flash" || modelName === "gemini-2.5-pro"
-          ? { maxReasoningTokens: 1024 }
+        // Enable thinking tokens for Gemini 2.5+ models
+        ...(isGoogleThinkingModel(modelName)
+          ? { maxReasoningTokens: FALLBACK_MAX_TOKENS }
           : {}),
         streaming: options.stream != null,
       });
diff --git a/src/packages/server/llm/ollama.ts b/src/packages/server/llm/ollama.ts
@@ -1,4 +1,4 @@
-import type { Ollama } from "@langchain/ollama";
+import { Ollama } from "@langchain/ollama";
 import {
   ChatPromptTemplate,
   MessagesPlaceholder,
@@ -22,27 +22,43 @@ interface OllamaOpts {
   model: string; // this must be ollama-[model]
   stream?: Stream;
   maxTokens?: number;
+  endpoint?: string; // optional endpoint for user-defined models
 }
 
 export async function evaluateOllama(
   opts: Readonly<OllamaOpts>,
   client?: Ollama,
 ): Promise<ChatOutput> {
-  if (client == null && !isOllamaLLM(opts.model)) {
+  if (client == null && !isOllamaLLM(opts.model) && !opts.endpoint) {
     throw new Error(`model ${opts.model} not supported`);
   }
   const model = fromOllamaModel(opts.model);
-  const { system, history, input, maxTokens, stream } = opts;
+  const { system, history, input, maxTokens, stream, endpoint } = opts;
   log.debug("evaluateOllama", {
     input,
     history,
     system,
     model,
     stream: stream != null,
     maxTokens,
+    endpoint,
   });
 
-  const ollama = client ?? (await getOllama(model));
+  // Create Ollama client: use provided client, or create from endpoint, or get from server settings
+  let ollama: Ollama;
+  if (client != null) {
+    ollama = client;
+  } else if (endpoint != null) {
+    // User-defined Ollama model with custom endpoint
+    ollama = new Ollama({
+      baseUrl: endpoint,
+      model,
+      keepAlive: "24h",
+    });
+  } else {
+    // Platform Ollama model from server settings
+    ollama = await getOllama(model);
+  }
 
   const historyMessagesKey = "history";
 
diff --git a/src/packages/server/llm/test/mock2.test.ts b/src/packages/server/llm/test/mock2.test.ts
@@ -313,6 +313,75 @@ describe("evaluateWithLangChain (LangChain mocked)", () => {
     });
   });
 
+  test("user-defined Ollama with custom max_tokens", async () => {
+    const ollamaConfig = [
+      {
+        id: 1,
+        service: "ollama",
+        model: "llama3",
+        display: "User Llama3",
+        endpoint: "http://localhost:11434",
+        apiKey: "",
+        max_tokens: 32000,
+      },
+    ];
+
+    mockCallback2.mockResolvedValueOnce({
+      other_settings: {
+        [OTHER_SETTINGS_USER_DEFINED_LLM]: JSON.stringify(ollamaConfig),
+      },
+    });
+
+    await evaluateUserDefinedLLM(
+      {
+        input: "hello",
+        model: "user-ollama-llama3",
+      },
+      userAccountId,
+    );
+
+    expect(mockOllama).toHaveBeenCalledWith({
+      baseUrl: "http://localhost:11434",
+      model: "llama3",
+      keepAlive: "24h",
+    });
+  });
+
+  test("user-defined Google with custom max_tokens", async () => {
+    const googleConfig = [
+      {
+        id: 1,
+        service: "google",
+        model: "gemini-2.5-flash",
+        display: "User Gemini Flash",
+        endpoint: "",
+        apiKey: "user-google-key",
+        max_tokens: 128000,
+      },
+    ];
+
+    mockCallback2.mockResolvedValueOnce({
+      other_settings: {
+        [OTHER_SETTINGS_USER_DEFINED_LLM]: JSON.stringify(googleConfig),
+      },
+    });
+
+    await evaluateUserDefinedLLM(
+      {
+        input: "hi",
+        model: "user-google-gemini-2.5-flash",
+      },
+      userAccountId,
+    );
+
+    expect(mockChatGoogle).toHaveBeenCalledWith(
+      expect.objectContaining({
+        apiKey: "user-google-key",
+        model: "gemini-2.5-flash",
+      }),
+    );
+  });
+
   test("ollama streams with configured model", async () => {
     streamChunks = ["hi", " there"];
     const stream = jest.fn();
diff --git a/src/packages/server/llm/test/models.test.ts b/src/packages/server/llm/test/models.test.ts
@@ -260,4 +260,20 @@ test_llm("user")("User-defined LLMs", () => {
     },
     LLM_TIMEOUT,
   );
+
+  // Test user-defined model with custom max_tokens
+  test_llm_case("google")(
+    "user-defined model with custom max_tokens (requires COCALC_TEST_GOOGLE_GENAI_KEY)",
+    async () => {
+      await testUserDefinedLLM({
+        service: "google",
+        display: "Test Gemini Flash with custom max_tokens",
+        endpoint: "",
+        model: "gemini-2.5-flash",
+        apiKey: process.env.COCALC_TEST_GOOGLE_GENAI_KEY!,
+        max_tokens: 128000, // Custom large context window
+      });
+    },
+    LLM_TIMEOUT,
+  );
 });
diff --git a/src/packages/server/llm/user-defined.ts b/src/packages/server/llm/user-defined.ts
@@ -17,6 +17,14 @@ import { evaluateWithLangChain } from "./evaluate-lc";
 
 const log = getLogger("llm:userdefined");
 
+const REDACTED_VALUE = "[redacted]";
+const SENSITIVE_KEYS = new Set([
+  "apiKey",
+  "openAIApiKey",
+  "azureOpenAIApiKey",
+  "api_key",
+]);
+
 interface UserDefinedOpts {
   input: string; // new input that user types
   system?: string; // extra setup that we add for relevance and context
@@ -30,7 +38,7 @@ export async function evaluateUserDefinedLLM(
   opts: Readonly<UserDefinedOpts>,
   account_id?: string,
 ) {
-  log.debug(`evaluateUserDefinedLLM[${account_id}]`, opts);
+  log.debug(`evaluateUserDefinedLLM[${account_id}]`, redactSensitive(opts));
 
   const { user_defined_llm } = await getServerSettings();
   if (!user_defined_llm) {
@@ -48,7 +56,7 @@ export async function evaluateUserDefinedLLM(
   }
 
   const conf = await getConfig(account_id, um.service, um.model);
-  log.debug("conf", conf);
+  log.debug("conf", redactSensitive(conf));
   if (conf == null) {
     throw new Error(`Unable to retrieve user defined model ${model}`);
   }
@@ -61,6 +69,8 @@ export async function evaluateUserDefinedLLM(
       return await evaluateOllama({
         ...opts,
         model: toOllamaModel(conf.model),
+        endpoint,
+        maxTokens: conf.max_tokens,
       });
     }
     case "openai":
@@ -76,6 +86,7 @@ export async function evaluateUserDefinedLLM(
           apiKey,
           endpoint: endpoint || undefined, // don't pass along empty strings!
           service,
+          maxTokens: conf.max_tokens, // Use max_tokens from config
         },
         "user",
       );
@@ -106,8 +117,58 @@ async function getConfig(
       }
     }
   } catch (err) {
-    log.error("Failed to parse user defined llm", user_llm_json, err);
+    log.error(
+      "Failed to parse user defined llm",
+      redactUserLLMJson(user_llm_json),
+      err,
+    );
     throw err;
   }
   return null;
 }
+
+function redactSensitive(value: any): any {
+  if (value == null) {
+    return value;
+  }
+  if (typeof value === "function") {
+    return value;
+  }
+  if (typeof value !== "object") {
+    return value;
+  }
+  if (Array.isArray(value)) {
+    return value.map((item) => redactSensitive(item));
+  }
+  if (value instanceof Date) {
+    return value;
+  }
+  const output: Record<string, any> = {};
+  for (const [key, val] of Object.entries(value)) {
+    output[key] = SENSITIVE_KEYS.has(key)
+      ? REDACTED_VALUE
+      : redactSensitive(val);
+  }
+  return output;
+}
+
+function redactUserLLMJson(value: unknown): unknown {
+  if (typeof value !== "string") {
+    return value;
+  }
+  try {
+    const parsed = JSON.parse(value);
+    return JSON.stringify(redactSensitive(parsed));
+  } catch (_err) {
+    return redactSensitiveString(value);
+  }
+}
+
+function redactSensitiveString(value: string): string {
+  let redacted = value;
+  for (const key of SENSITIVE_KEYS) {
+    const regex = new RegExp(`("${key}"\\s*:\\s*")([^"]*)(")`, "g");
+    redacted = redacted.replace(regex, `$1${REDACTED_VALUE}$3`);
+  }
+  return redacted;
+}
diff --git a/src/packages/util/db-schema/accounts.ts b/src/packages/util/db-schema/accounts.ts
diff --git a/src/packages/util/db-schema/llm-utils.ts b/src/packages/util/db-schema/llm-utils.ts