-
Notifications
You must be signed in to change notification settings - Fork 61.1k
Feature/ support (alibaba tts + alibaba function calling + network search) #6588
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 12 commits
9990a89
c5e6b12
e836dc0
221229c
fe484fd
4e3f166
044298e
86f2c67
9cb7275
45eb96f
b73e65d
800c96c
16c3255
bf999b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
nodeLinker: node-modules |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ import { | |
useChatStore, | ||
ChatMessageTool, | ||
usePluginStore, | ||
FunctionToolItem, | ||
} from "@/app/store"; | ||
import { | ||
preProcessImageContentForAlibabaDashScope, | ||
|
@@ -51,6 +52,8 @@ interface RequestParam { | |
repetition_penalty?: number; | ||
top_p: number; | ||
max_tokens?: number; | ||
tools?: FunctionToolItem[]; | ||
enable_search?: boolean; | ||
} | ||
interface RequestPayload { | ||
model: string; | ||
|
@@ -59,6 +62,7 @@ interface RequestPayload { | |
} | ||
|
||
export class QwenApi implements LLMApi { | ||
private static audioContext: AudioContext | null = null; | ||
path(path: string): string { | ||
const accessStore = useAccessStore.getState(); | ||
|
||
|
@@ -89,10 +93,83 @@ export class QwenApi implements LLMApi { | |
return res?.output?.choices?.at(0)?.message?.content ?? ""; | ||
} | ||
|
||
speech(options: SpeechOptions): Promise<ArrayBuffer> { | ||
async speech(options: SpeechOptions): Promise<ArrayBuffer> { | ||
throw new Error("Method not implemented."); | ||
} | ||
|
||
async *streamSpeech(options: SpeechOptions): AsyncGenerator<AudioBuffer> { | ||
if (!options.input || !options.model) { | ||
throw new Error("Missing required parameters: input and model"); | ||
} | ||
const requestPayload = { | ||
model: options.model, | ||
input: { | ||
text: options.input, | ||
voice: options.voice, | ||
}, | ||
speed: options.speed, | ||
response_format: options.response_format, | ||
}; | ||
const controller = new AbortController(); | ||
options.onController?.(controller); | ||
try { | ||
const speechPath = this.path(Alibaba.SpeechPath); | ||
const speechPayload = { | ||
method: "POST", | ||
body: JSON.stringify(requestPayload), | ||
signal: controller.signal, | ||
headers: { | ||
...getHeaders(), | ||
"X-DashScope-SSE": "enable", | ||
}, | ||
}; | ||
Comment on lines
+124
to
+132
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Harden SSE request: set headers, check res.ok/body, and clear timeout on all paths Missing Accept/Content-Type, no res.ok check, and no guard for res.body. Also ensure timeout is cleared in finally. const speechPayload = {
method: "POST",
body: JSON.stringify(requestPayload),
signal: controller.signal,
headers: {
...getHeaders(),
"X-DashScope-SSE": "enable",
+ Accept: "text/event-stream",
+ "Content-Type": "application/json",
},
};
@@
- const res = await fetch(speechPath, speechPayload);
- clearTimeout(requestTimeoutId); // Clear timeout on successful connection
+ const res = await fetch(speechPath, speechPayload);
+ if (!res.ok) {
+ const errText = await res.text().catch(() => "");
+ throw new Error(
+ `[Alibaba TTS] HTTP ${res.status} ${res.statusText} ${errText}`,
+ );
+ }
+ if (!res.body) {
+ throw new Error("[Alibaba TTS] Missing response body for SSE stream.");
+ } And move timeout cleanup into finally (see next comment). Also applies to: 140-146 🤖 Prompt for AI Agents
|
||
|
||
// make a fetch request | ||
const requestTimeoutId = setTimeout( | ||
() => controller.abort(), | ||
getTimeoutMSByModel(options.model), | ||
); | ||
|
||
const res = await fetch(speechPath, speechPayload); | ||
clearTimeout(requestTimeoutId); // Clear timeout on successful connection | ||
|
||
Little-LittleProgrammer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const reader = res.body!.getReader(); | ||
const decoder = new TextDecoder(); | ||
let buffer = ""; | ||
while (true) { | ||
const { done, value } = await reader.read(); | ||
if (done) { | ||
break; | ||
} | ||
buffer += decoder.decode(value, { stream: true }); | ||
const lines = buffer.split("\n"); | ||
buffer = lines.pop() || ""; | ||
|
||
for (const line of lines) { | ||
const data = line.slice(5); | ||
try { | ||
if (line.startsWith("data:")) { | ||
const json = JSON.parse(data); | ||
if (json.output?.audio?.data) { | ||
yield this.PCMBase64ToAudioBuffer(json.output.audio.data); | ||
} | ||
} | ||
Little-LittleProgrammer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} catch (parseError) { | ||
console.warn( | ||
"[StreamSpeech] Failed to parse SSE data:", | ||
parseError, | ||
); | ||
continue; | ||
} | ||
} | ||
} | ||
reader.releaseLock(); | ||
} catch (e) { | ||
console.log("[Request] failed to make a speech request", e); | ||
throw e; | ||
} | ||
} | ||
Little-LittleProgrammer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
async chat(options: ChatOptions) { | ||
const modelConfig = { | ||
...useAppConfig.getState().modelConfig, | ||
|
@@ -129,6 +206,7 @@ export class QwenApi implements LLMApi { | |
temperature: modelConfig.temperature, | ||
// max_tokens: modelConfig.max_tokens, | ||
top_p: modelConfig.top_p === 1 ? 0.99 : modelConfig.top_p, // qwen top_p is should be < 1 | ||
enable_search: modelConfig.enableNetWork, | ||
}, | ||
}; | ||
|
||
|
@@ -161,11 +239,16 @@ export class QwenApi implements LLMApi { | |
.getAsTools( | ||
useChatStore.getState().currentSession().mask?.plugin || [], | ||
); | ||
// console.log("getAsTools", tools, funcs); | ||
const _tools = tools as unknown as FunctionToolItem[]; | ||
if (_tools && _tools.length > 0) { | ||
requestPayload.parameters.tools = _tools; | ||
} | ||
return streamWithThink( | ||
chatPath, | ||
requestPayload, | ||
headers, | ||
tools as any, | ||
[], | ||
funcs, | ||
controller, | ||
// parseSSE | ||
|
@@ -198,7 +281,7 @@ export class QwenApi implements LLMApi { | |
}); | ||
} else { | ||
// @ts-ignore | ||
runTools[index]["function"]["arguments"] += args; | ||
runTools[index]["function"]["arguments"] += args || ""; | ||
} | ||
} | ||
|
||
|
@@ -273,5 +356,79 @@ export class QwenApi implements LLMApi { | |
async models(): Promise<LLMModel[]> { | ||
return []; | ||
} | ||
|
||
// 播放 PCM base64 数据 | ||
private async PCMBase64ToAudioBuffer(base64Data: string) { | ||
try { | ||
// 解码 base64 | ||
const binaryString = atob(base64Data); | ||
const bytes = new Uint8Array(binaryString.length); | ||
for (let i = 0; i < binaryString.length; i++) { | ||
bytes[i] = binaryString.charCodeAt(i); | ||
} | ||
|
||
// 转换为 AudioBuffer | ||
const audioBuffer = await this.convertToAudioBuffer(bytes); | ||
|
||
return audioBuffer; | ||
} catch (error) { | ||
console.error("播放 PCM 数据失败:", error); | ||
throw error; | ||
} | ||
} | ||
Little-LittleProgrammer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
private static getAudioContext(): AudioContext { | ||
if (!QwenApi.audioContext) { | ||
QwenApi.audioContext = new (window.AudioContext || | ||
window.webkitAudioContext)(); | ||
} | ||
return QwenApi.audioContext; | ||
} | ||
|
||
// 将 PCM 字节数据转换为 AudioBuffer | ||
private convertToAudioBuffer(pcmData: Uint8Array) { | ||
const audioContext = QwenApi.getAudioContext(); | ||
const channels = 1; | ||
const sampleRate = 24000; | ||
return new Promise<AudioBuffer>((resolve, reject) => { | ||
try { | ||
let float32Array; | ||
// 16位 PCM 转换为 32位浮点数 | ||
float32Array = this.pcm16ToFloat32(pcmData); | ||
|
||
// 创建 AudioBuffer | ||
const audioBuffer = audioContext.createBuffer( | ||
channels, | ||
float32Array.length / channels, | ||
sampleRate, | ||
); | ||
|
||
// 复制数据到 AudioBuffer | ||
for (let channel = 0; channel < channels; channel++) { | ||
const channelData = audioBuffer.getChannelData(channel); | ||
for (let i = 0; i < channelData.length; i++) { | ||
channelData[i] = float32Array[i * channels + channel]; | ||
} | ||
} | ||
|
||
resolve(audioBuffer); | ||
} catch (error) { | ||
reject(error); | ||
} | ||
}); | ||
} | ||
// 16位 PCM 转 32位浮点数 | ||
private pcm16ToFloat32(pcmData: Uint8Array) { | ||
const length = pcmData.length / 2; | ||
const float32Array = new Float32Array(length); | ||
|
||
for (let i = 0; i < length; i++) { | ||
const int16 = (pcmData[i * 2 + 1] << 8) | pcmData[i * 2]; | ||
const int16Signed = int16 > 32767 ? int16 - 65536 : int16; | ||
float32Array[i] = int16Signed / 32768; | ||
} | ||
|
||
return float32Array; | ||
} | ||
} | ||
export { Alibaba }; |
Uh oh!
There was an error while loading. Please reload this page.