ChatGPTNextWeb · Little-LittleProgrammer · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025 · Jul 31, 2025
diff --git a/.yarnrc.yml b/.yarnrc.yml
@@ -0,0 +1 @@
+nodeLinker: node-modules
diff --git a/app/client/api.ts b/app/client/api.ts
@@ -107,7 +107,8 @@ export interface LLMModelProvider {
 
 export abstract class LLMApi {
   abstract chat(options: ChatOptions): Promise<void>;
-  abstract speech(options: SpeechOptions): Promise<ArrayBuffer>;
+  abstract speech(options: SpeechOptions): Promise<ArrayBuffer | AudioBuffer>;
+  abstract streamSpeech?(options: SpeechOptions): AsyncGenerator<AudioBuffer>;
   abstract usage(): Promise<LLMUsage>;
   abstract models(): Promise<LLMModel[]>;
 }

diff --git a/app/client/platforms/alibaba.ts b/app/client/platforms/alibaba.ts
@@ -6,6 +6,7 @@ import {
   useChatStore,
   ChatMessageTool,
   usePluginStore,
+  FunctionToolItem,
 } from "@/app/store";
 import {
   preProcessImageContentForAlibabaDashScope,
@@ -51,6 +52,8 @@ interface RequestParam {
   repetition_penalty?: number;
   top_p: number;
   max_tokens?: number;
+  tools?: FunctionToolItem[];
+  enable_search?: boolean;
 }
 interface RequestPayload {
   model: string;
@@ -59,6 +62,7 @@ interface RequestPayload {
 }
 
 export class QwenApi implements LLMApi {
+  private static audioContext: AudioContext | null = null;
   path(path: string): string {
     const accessStore = useAccessStore.getState();
 
@@ -89,10 +93,83 @@ export class QwenApi implements LLMApi {
     return res?.output?.choices?.at(0)?.message?.content ?? "";
   }
 
-  speech(options: SpeechOptions): Promise<ArrayBuffer> {
+  async speech(options: SpeechOptions): Promise<ArrayBuffer> {
     throw new Error("Method not implemented.");
   }
 
+  async *streamSpeech(options: SpeechOptions): AsyncGenerator<AudioBuffer> {
+    if (!options.input || !options.model) {
+      throw new Error("Missing required parameters: input and model");
+    }
+    const requestPayload = {
+      model: options.model,
+      input: {
+        text: options.input,
+        voice: options.voice,
+      },
+      speed: options.speed,
+      response_format: options.response_format,
+    };
+    const controller = new AbortController();
+    options.onController?.(controller);
+    try {
+      const speechPath = this.path(Alibaba.SpeechPath);
+      const speechPayload = {
+        method: "POST",
+        body: JSON.stringify(requestPayload),
+        signal: controller.signal,
+        headers: {
+          ...getHeaders(),
+          "X-DashScope-SSE": "enable",
+        },
+      };
+
+      // make a fetch request
+      const requestTimeoutId = setTimeout(
+        () => controller.abort(),
+        getTimeoutMSByModel(options.model),
+      );
+
+      const res = await fetch(speechPath, speechPayload);
+      clearTimeout(requestTimeoutId); // Clear timeout on successful connection
+
+      const reader = res.body!.getReader();
+      const decoder = new TextDecoder();
+      let buffer = "";
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) {
+          break;
+        }
+        buffer += decoder.decode(value, { stream: true });
+        const lines = buffer.split("\n");
+        buffer = lines.pop() || "";
+
+        for (const line of lines) {
+          const data = line.slice(5);
+          try {
+            if (line.startsWith("data:")) {
+              const json = JSON.parse(data);
+              if (json.output?.audio?.data) {
+                yield this.PCMBase64ToAudioBuffer(json.output.audio.data);
+              }
+            }
+          } catch (parseError) {
+            console.warn(
+              "[StreamSpeech] Failed to parse SSE data:",
+              parseError,
+            );
+            continue;
+          }
+        }
+      }
+      reader.releaseLock();
+    } catch (e) {
+      console.log("[Request] failed to make a speech request", e);
+      throw e;
+    }
+  }
+
   async chat(options: ChatOptions) {
     const modelConfig = {
       ...useAppConfig.getState().modelConfig,
@@ -129,6 +206,7 @@ export class QwenApi implements LLMApi {
         temperature: modelConfig.temperature,
         // max_tokens: modelConfig.max_tokens,
         top_p: modelConfig.top_p === 1 ? 0.99 : modelConfig.top_p, // qwen top_p is should be < 1
+        enable_search: modelConfig.enableNetWork,
       },
     };
 
@@ -161,11 +239,16 @@ export class QwenApi implements LLMApi {
           .getAsTools(
             useChatStore.getState().currentSession().mask?.plugin || [],
           );
+        // console.log("getAsTools", tools, funcs);
+        const _tools = tools as unknown as FunctionToolItem[];
+        if (_tools && _tools.length > 0) {
+          requestPayload.parameters.tools = _tools;
+        }
         return streamWithThink(
           chatPath,
           requestPayload,
           headers,
-          tools as any,
+          [],
           funcs,
           controller,
           // parseSSE
@@ -198,7 +281,7 @@ export class QwenApi implements LLMApi {
                 });
               } else {
                 // @ts-ignore
-                runTools[index]["function"]["arguments"] += args;
+                runTools[index]["function"]["arguments"] += args || "";
               }
             }
 
@@ -273,5 +356,79 @@ export class QwenApi implements LLMApi {
   async models(): Promise<LLMModel[]> {
     return [];
   }
+
+  // 播放 PCM base64 数据
+  private async PCMBase64ToAudioBuffer(base64Data: string) {
+    try {
+      // 解码 base64
+      const binaryString = atob(base64Data);
+      const bytes = new Uint8Array(binaryString.length);
+      for (let i = 0; i < binaryString.length; i++) {
+        bytes[i] = binaryString.charCodeAt(i);
+      }
+
+      // 转换为 AudioBuffer
+      const audioBuffer = await this.convertToAudioBuffer(bytes);
+
+      return audioBuffer;
+    } catch (error) {
+      console.error("播放 PCM 数据失败:", error);
+      throw error;
+    }
+  }
+
+  private static getAudioContext(): AudioContext {
+    if (!QwenApi.audioContext) {
+      QwenApi.audioContext = new (window.AudioContext ||
+        window.webkitAudioContext)();
+    }
+    return QwenApi.audioContext;
+  }
+
+  // 将 PCM 字节数据转换为 AudioBuffer
+  private convertToAudioBuffer(pcmData: Uint8Array) {
+    const audioContext = QwenApi.getAudioContext();
+    const channels = 1;
+    const sampleRate = 24000;
+    return new Promise<AudioBuffer>((resolve, reject) => {
+      try {
+        let float32Array;
+        // 16位 PCM 转换为 32位浮点数
+        float32Array = this.pcm16ToFloat32(pcmData);
+
+        // 创建 AudioBuffer
+        const audioBuffer = audioContext.createBuffer(
+          channels,
+          float32Array.length / channels,
+          sampleRate,
+        );
+
+        // 复制数据到 AudioBuffer
+        for (let channel = 0; channel < channels; channel++) {
+          const channelData = audioBuffer.getChannelData(channel);
+          for (let i = 0; i < channelData.length; i++) {
+            channelData[i] = float32Array[i * channels + channel];
+          }
+        }
+
+        resolve(audioBuffer);
+      } catch (error) {
+        reject(error);
+      }
+    });
+  }
+  // 16位 PCM 转 32位浮点数
+  private pcm16ToFloat32(pcmData: Uint8Array) {
+    const length = pcmData.length / 2;
+    const float32Array = new Float32Array(length);
+
+    for (let i = 0; i < length; i++) {
+      const int16 = (pcmData[i * 2 + 1] << 8) | pcmData[i * 2];
+      const int16Signed = int16 > 32767 ? int16 - 65536 : int16;
+      float32Array[i] = int16Signed / 32768;
+    }
+
+    return float32Array;
+  }
 }
 export { Alibaba };