Skip to content

Commit cea97f1

Browse files
committed
Merge: upstream: songquanpeng#1497
1 parent 86cef78 commit cea97f1

File tree

26 files changed

+587
-40
lines changed

26 files changed

+587
-40
lines changed

controller/relay.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ func relayHelper(c *gin.Context, relayMode int) *model.ErrorWithStatusCode {
2929
case relaymode.ImagesGenerations:
3030
err = controller.RelayImageHelper(c, relayMode)
3131
case relaymode.AudioSpeech:
32-
fallthrough
32+
err = controller.RelayAudioSpeechHelper(c)
3333
case relaymode.AudioTranslation:
3434
fallthrough
3535
case relaymode.AudioTranscription:

relay/adaptor/aiproxy/adaptor.go

+10-2
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@ package aiproxy
33
import (
44
"errors"
55
"fmt"
6+
"io"
7+
"net/http"
8+
69
"github.com/gin-gonic/gin"
710
"github.com/songquanpeng/one-api/relay/adaptor"
811
"github.com/songquanpeng/one-api/relay/meta"
912
"github.com/songquanpeng/one-api/relay/model"
10-
"io"
11-
"net/http"
1213
)
1314

1415
type Adaptor struct {
@@ -45,6 +46,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
4546
return request, nil
4647
}
4748

49+
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
50+
if request == nil {
51+
return nil, errors.New("request is nil")
52+
}
53+
return request, nil
54+
}
55+
4856
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
4957
return adaptor.DoRequestHelper(a, c, meta, requestBody)
5058
}

relay/adaptor/ali/adaptor.go

+90-2
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
package ali
22

33
import (
4+
"encoding/json"
45
"errors"
56
"fmt"
7+
"io"
8+
"net/http"
9+
610
"github.com/gin-gonic/gin"
11+
"github.com/gorilla/websocket"
712
"github.com/songquanpeng/one-api/relay/adaptor"
813
"github.com/songquanpeng/one-api/relay/meta"
914
"github.com/songquanpeng/one-api/relay/model"
1015
"github.com/songquanpeng/one-api/relay/relaymode"
11-
"io"
12-
"net/http"
1316
)
1417

1518
// https://help.aliyun.com/zh/dashscope/developer-reference/api-details
@@ -76,7 +79,19 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
7679
return aliRequest, nil
7780
}
7881

82+
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
83+
if request == nil {
84+
return nil, errors.New("request is nil")
85+
}
86+
87+
aliRequest := ConvertTextToSpeechRequest(*request)
88+
return aliRequest, nil
89+
}
90+
7991
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
92+
if meta.Mode == relaymode.AudioSpeech {
93+
return a.DoWSSRequest(c, meta, requestBody)
94+
}
8095
return adaptor.DoRequestHelper(a, c, meta, requestBody)
8196
}
8297

@@ -89,6 +104,8 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Met
89104
err, usage = EmbeddingHandler(c, resp)
90105
case relaymode.ImagesGenerations:
91106
err, usage = ImageHandler(c, resp)
107+
case relaymode.AudioSpeech:
108+
err, usage = AudioSpeechHandler(c, resp)
92109
default:
93110
err, usage = Handler(c, resp)
94111
}
@@ -103,3 +120,74 @@ func (a *Adaptor) GetModelList() []string {
103120
func (a *Adaptor) GetChannelName() string {
104121
return "ali"
105122
}
123+
124+
func (a *Adaptor) DoWSSRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
125+
baseURL := "wss://dashscope.aliyuncs.com/api-ws/v1/inference"
126+
var usage Usage
127+
// Create an empty http.Response object
128+
response := &http.Response{
129+
StatusCode: http.StatusInternalServerError,
130+
Body: io.NopCloser(nil),
131+
}
132+
133+
conn, _, err := websocket.DefaultDialer.Dial(baseURL, http.Header{"Authorization": {"Bearer " + meta.APIKey}})
134+
if err != nil {
135+
return response, errors.New("ali_wss_conn_failed")
136+
}
137+
defer conn.Close()
138+
139+
var requestBodyBytes []byte
140+
requestBodyBytes, err = io.ReadAll(requestBody)
141+
if err != nil {
142+
return response, errors.New("ali_failed_to_read_request_body")
143+
}
144+
145+
// Convert JSON strings to map[string]interface{}
146+
var requestBodyMap map[string]interface{}
147+
err = json.Unmarshal(requestBodyBytes, &requestBodyMap)
148+
if err != nil {
149+
return response, errors.New("ali_failed_to_parse_request_body")
150+
}
151+
152+
if err := conn.WriteJSON(requestBodyMap); err != nil {
153+
return response, errors.New("ali_wss_write_msg_failed")
154+
}
155+
156+
const chunkSize = 1024
157+
158+
for {
159+
messageType, audioData, err := conn.ReadMessage()
160+
if err != nil {
161+
if err == io.EOF {
162+
break
163+
}
164+
return response, errors.New("ali_wss_read_msg_failed")
165+
}
166+
167+
var msg WSSMessage
168+
switch messageType {
169+
case websocket.TextMessage:
170+
err = json.Unmarshal(audioData, &msg)
171+
if msg.Header.Event == "task-finished" {
172+
response.StatusCode = http.StatusOK
173+
usage.TotalTokens = msg.Payload.Usage.Characters
174+
return response, nil
175+
}
176+
case websocket.BinaryMessage:
177+
for i := 0; i < len(audioData); i += chunkSize {
178+
end := i + chunkSize
179+
if end > len(audioData) {
180+
end = len(audioData)
181+
}
182+
chunk := audioData[i:end]
183+
184+
_, writeErr := c.Writer.Write(chunk)
185+
if writeErr != nil {
186+
return response, errors.New("wss_write_chunk_failed")
187+
}
188+
}
189+
}
190+
}
191+
192+
return response, nil
193+
}

relay/adaptor/ali/audio-speech.go

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package ali
2+
3+
import (
4+
"net/http"
5+
6+
"github.com/gin-gonic/gin"
7+
"github.com/songquanpeng/one-api/relay/adaptor/openai"
8+
"github.com/songquanpeng/one-api/relay/model"
9+
)
10+
11+
func AudioSpeechHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusCode, *model.Usage) {
12+
for k, v := range resp.Header {
13+
c.Writer.Header().Set(k, v[0])
14+
}
15+
c.Writer.WriteHeader(resp.StatusCode)
16+
17+
err := resp.Body.Close()
18+
if err != nil {
19+
return openai.ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
20+
}
21+
return nil, nil
22+
}

relay/adaptor/ali/constants.go

+44
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,48 @@ var ModelList = []string{
44
"qwen-turbo", "qwen-plus", "qwen-max", "qwen-max-longcontext",
55
"text-embedding-v1",
66
"ali-stable-diffusion-xl", "ali-stable-diffusion-v1.5", "wanx-v1",
7+
8+
"sambert-zhichu-v1",
9+
"sambert-zhiwei-v1",
10+
"sambert-zhixiang-v1",
11+
"sambert-zhide-v1",
12+
"sambert-zhijia-v1",
13+
"sambert-zhinan-v1",
14+
"sambert-zhiqi-v1",
15+
"sambert-zhiqian-v1",
16+
"sambert-zhiru-v1",
17+
"sambert-zhimiao-emo-v1",
18+
"sambert-zhida-v1",
19+
"sambert-zhifei-v1",
20+
"sambert-zhigui-v1",
21+
"sambert-zhihao-v1",
22+
"sambert-zhijing-v1",
23+
"sambert-zhilun-v1",
24+
"sambert-zhimao-v1",
25+
"sambert-zhiming-v1",
26+
"sambert-zhimo-v1",
27+
"sambert-zhina-v1",
28+
"sambert-zhishu-v1",
29+
"sambert-zhishuo-v1",
30+
"sambert-zhistella-v1",
31+
"sambert-zhiting-v1",
32+
"sambert-zhixiao-v1",
33+
"sambert-zhiya-v1",
34+
"sambert-zhiye-v1",
35+
"sambert-zhiying-v1",
36+
"sambert-zhiyuan-v1",
37+
"sambert-zhiyue-v1",
38+
"sambert-camila-v1",
39+
"sambert-perla-v1",
40+
"sambert-indah-v1",
41+
"sambert-clara-v1",
42+
"sambert-hanna-v1",
43+
"sambert-beth-v1",
44+
"sambert-betty-v1",
45+
"sambert-cally-v1",
46+
"sambert-cindy-v1",
47+
"sambert-eva-v1",
48+
"sambert-donna-v1",
49+
"sambert-brian-v1",
50+
"sambert-waan-v1",
751
}

relay/adaptor/ali/main.go

+32
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"strings"
99

1010
"github.com/gin-gonic/gin"
11+
"github.com/google/uuid"
1112
"github.com/songquanpeng/one-api/common"
1213
"github.com/songquanpeng/one-api/common/helper"
1314
"github.com/songquanpeng/one-api/common/logger"
@@ -78,6 +79,37 @@ func ConvertImageRequest(request model.ImageRequest) *ImageRequest {
7879
return &imageRequest
7980
}
8081

82+
func ConvertTextToSpeechRequest(request model.TextToSpeechRequest) *WSSMessage {
83+
var ttsRequest WSSMessage
84+
ttsRequest.Header.Action = "run-task"
85+
ttsRequest.Header.Streaming = "out"
86+
ttsRequest.Header.TaskID = uuid.New().String()
87+
ttsRequest.Payload.Function = "SpeechSynthesizer"
88+
ttsRequest.Payload.Input.Text = request.Input
89+
ttsRequest.Payload.Model = request.Model
90+
ttsRequest.Payload.Parameters.Format = "wav"
91+
//ttsRequest.Payload.Parameters.SampleRate = 48000
92+
ttsRequest.Payload.Parameters.Rate = 1.0
93+
ttsRequest.Payload.Task = "tts"
94+
ttsRequest.Payload.TaskGroup = "audio"
95+
96+
format := map[string]bool{
97+
"pcm": true,
98+
"wav": true,
99+
"mp3": true,
100+
}
101+
102+
if _, ok := format[request.ResponseFormat]; ok {
103+
ttsRequest.Payload.Parameters.Format = request.ResponseFormat
104+
}
105+
106+
if 0.5 <= request.Speed && request.Speed <= 2 {
107+
ttsRequest.Payload.Parameters.Rate = request.Speed
108+
}
109+
110+
return &ttsRequest
111+
}
112+
81113
func EmbeddingHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusCode, *model.Usage) {
82114
var aliResponse EmbeddingResponse
83115
err := json.NewDecoder(resp.Body).Decode(&aliResponse)

relay/adaptor/anthropic/adaptor.go

+10-2
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@ package anthropic
33
import (
44
"errors"
55
"fmt"
6+
"io"
7+
"net/http"
8+
69
"github.com/gin-gonic/gin"
710
"github.com/songquanpeng/one-api/relay/adaptor"
811
"github.com/songquanpeng/one-api/relay/meta"
912
"github.com/songquanpeng/one-api/relay/model"
10-
"io"
11-
"net/http"
1213
)
1314

1415
type Adaptor struct {
@@ -48,6 +49,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
4849
return request, nil
4950
}
5051

52+
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
53+
if request == nil {
54+
return nil, errors.New("request is nil")
55+
}
56+
return request, nil
57+
}
58+
5159
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
5260
return adaptor.DoRequestHelper(a, c, meta, requestBody)
5361
}

relay/adaptor/aws/adapter.go

+10-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
package aws
22

33
import (
4+
"io"
5+
"net/http"
6+
47
"github.com/aws/aws-sdk-go-v2/aws"
58
"github.com/aws/aws-sdk-go-v2/credentials"
69
"github.com/aws/aws-sdk-go-v2/service/bedrockruntime"
710
"github.com/songquanpeng/one-api/common/ctxkey"
8-
"io"
9-
"net/http"
1011

1112
"github.com/gin-gonic/gin"
1213
"github.com/pkg/errors"
@@ -57,6 +58,13 @@ func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error)
5758
return request, nil
5859
}
5960

61+
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
62+
if request == nil {
63+
return nil, errors.New("request is nil")
64+
}
65+
return request, nil
66+
}
67+
6068
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
6169
return nil, nil
6270
}

relay/adaptor/baidu/adaptor.go

+10-2
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@ package baidu
33
import (
44
"errors"
55
"fmt"
6-
"github.com/songquanpeng/one-api/relay/meta"
7-
"github.com/songquanpeng/one-api/relay/relaymode"
86
"io"
97
"net/http"
108
"strings"
119

10+
"github.com/songquanpeng/one-api/relay/meta"
11+
"github.com/songquanpeng/one-api/relay/relaymode"
12+
1213
"github.com/gin-gonic/gin"
1314
"github.com/songquanpeng/one-api/relay/adaptor"
1415
"github.com/songquanpeng/one-api/relay/model"
@@ -120,6 +121,13 @@ func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Read
120121
return adaptor.DoRequestHelper(a, c, meta, requestBody)
121122
}
122123

124+
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
125+
if request == nil {
126+
return nil, errors.New("request is nil")
127+
}
128+
return request, nil
129+
}
130+
123131
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) {
124132
if meta.IsStream {
125133
err, usage = StreamHandler(c, resp)

relay/adaptor/cloudflare/adaptor.go

+7
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,13 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
5050
}
5151
}
5252

53+
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
54+
if request == nil {
55+
return nil, errors.New("request is nil")
56+
}
57+
return request, nil
58+
}
59+
5360
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
5461
return adaptor.DoRequestHelper(a, c, meta, requestBody)
5562
}

relay/adaptor/cohere/adaptor.go

+7
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
4242
return ConvertRequest(*request), nil
4343
}
4444

45+
func (a *Adaptor) ConvertTextToSpeechRequest(request *model.TextToSpeechRequest) (any, error) {
46+
if request == nil {
47+
return nil, errors.New("request is nil")
48+
}
49+
return request, nil
50+
}
51+
4552
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
4653
return adaptor.DoRequestHelper(a, c, meta, requestBody)
4754
}

0 commit comments

Comments
 (0)