From 9d7c3c0841b34688fe60d978e33486637c92e828 Mon Sep 17 00:00:00 2001
From: Wei Zhang <kweizh@tabbyml.com>
Date: Sun, 5 Jan 2025 01:03:59 +0800
Subject: [PATCH] refactor: use async-openai-alt and upgrade to 0.26.1

Signed-off-by: Wei Zhang <kweizh@tabbyml.com>
---
 Cargo.lock                                    | 36 ++++++-------
 Cargo.toml                                    |  2 +-
 crates/http-api-bindings/Cargo.toml           |  2 +-
 crates/http-api-bindings/src/chat/mod.rs      |  4 +-
 crates/http-api-bindings/src/rate_limit.rs    |  2 +-
 crates/llama-cpp-server/Cargo.toml            |  2 +-
 crates/llama-cpp-server/src/lib.rs            | 10 ++--
 crates/tabby-inference/Cargo.toml             |  2 +-
 crates/tabby-inference/src/chat.rs            |  6 +--
 crates/tabby/Cargo.toml                       |  2 +-
 crates/tabby/src/routes/chat.rs               |  4 +-
 ee/tabby-schema/Cargo.toml                    |  2 +-
 ee/tabby-schema/src/schema/mod.rs             |  2 +-
 ee/tabby-webserver/Cargo.toml                 |  2 +-
 ee/tabby-webserver/src/service/answer.rs      | 54 +++++++++++--------
 .../src/service/answer/testutils/mod.rs       |  4 +-
 16 files changed, 73 insertions(+), 63 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index fe613095e0b4..08a3bb69bd61 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -187,16 +187,17 @@ dependencies = [
 ]
 
 [[package]]
-name = "async-openai"
-version = "0.20.0"
+name = "async-openai-alt"
+version = "0.26.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11e97f9c5e0ee3260caee9700ba1bb61a6fdc34d2b6786a31e018c5de5198491"
+checksum = "2df183306e5fa71c7a5af4571e10504806a3d47825e172824fddee8ed9182cbf"
 dependencies = [
  "async-convert",
  "backoff",
  "base64 0.22.1",
  "bytes",
  "derive_builder",
+ "eventsource-stream",
  "futures",
  "rand 0.8.5",
  "reqwest",
@@ -1915,7 +1916,7 @@ name = "http-api-bindings"
 version = "0.24.0-dev.0"
 dependencies = [
  "anyhow",
- "async-openai",
+ "async-openai-alt",
  "async-stream",
  "async-trait",
  "futures",
@@ -2608,7 +2609,7 @@ name = "llama-cpp-server"
 version = "0.24.0-dev.0"
 dependencies = [
  "anyhow",
- "async-openai",
+ "async-openai-alt",
  "async-trait",
  "cmake",
  "futures",
@@ -2872,13 +2873,13 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "0.8.11"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
+checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
 dependencies = [
  "libc",
  "wasi 0.11.0+wasi-snapshot-preview1",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -5266,7 +5267,7 @@ version = "0.24.0-dev.0"
 dependencies = [
  "anyhow",
  "assert-json-diff",
- "async-openai",
+ "async-openai-alt",
  "async-stream",
  "async-trait",
  "axum",
@@ -5482,7 +5483,7 @@ name = "tabby-inference"
 version = "0.24.0-dev.0"
 dependencies = [
  "anyhow",
- "async-openai",
+ "async-openai-alt",
  "async-stream",
  "async-trait",
  "dashmap",
@@ -5500,7 +5501,7 @@ name = "tabby-schema"
 version = "0.24.0-dev.0"
 dependencies = [
  "anyhow",
- "async-openai",
+ "async-openai-alt",
  "async-trait",
  "axum",
  "base64 0.22.1",
@@ -5529,7 +5530,7 @@ dependencies = [
  "anyhow",
  "argon2",
  "assert_matches",
- "async-openai",
+ "async-openai-alt",
  "async-stream",
  "async-trait",
  "axum",
@@ -5888,28 +5889,27 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.37.0"
+version = "1.42.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787"
+checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551"
 dependencies = [
  "backtrace",
  "bytes",
  "libc",
  "mio",
- "num_cpus",
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
  "socket2",
  "tokio-macros",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "tokio-macros"
-version = "2.2.0"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
+checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/Cargo.toml b/Cargo.toml
index 99f2d05c9bb6..865bbace6bcf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -68,7 +68,7 @@ mime_guess = "2.0.4"
 assert_matches = "1.5"
 insta = "1.34.0"
 logkit = "0.3"
-async-openai = "0.20"
+async-openai-alt = "0.26.1"
 tracing-test = "0.2"
 clap = "4.3.0"
 ratelimit = "0.10"
diff --git a/crates/http-api-bindings/Cargo.toml b/crates/http-api-bindings/Cargo.toml
index 03dd3861241e..5fc6c44f1a27 100644
--- a/crates/http-api-bindings/Cargo.toml
+++ b/crates/http-api-bindings/Cargo.toml
@@ -17,7 +17,7 @@ serde_json = { workspace = true }
 tabby-common = { path = "../tabby-common" }
 tabby-inference = { path = "../tabby-inference" }
 ollama-api-bindings = { path = "../ollama-api-bindings" }
-async-openai.workspace = true
+async-openai-alt.workspace = true
 tokio.workspace = true
 tracing.workspace = true
 leaky-bucket = "1.1.2"
diff --git a/crates/http-api-bindings/src/chat/mod.rs b/crates/http-api-bindings/src/chat/mod.rs
index f30a36ed1dca..bed9a15a6fa2 100644
--- a/crates/http-api-bindings/src/chat/mod.rs
+++ b/crates/http-api-bindings/src/chat/mod.rs
@@ -1,6 +1,6 @@
 use std::sync::Arc;
 
-use async_openai::config::OpenAIConfig;
+use async_openai_alt::config::OpenAIConfig;
 use tabby_common::config::HttpModelConfig;
 use tabby_inference::{ChatCompletionStream, ExtendedOpenAIConfig};
 
@@ -34,7 +34,7 @@ pub async fn create(model: &HttpModelConfig) -> Arc<dyn ChatCompletionStream> {
     let config = builder.build().expect("Failed to build config");
 
     let engine = Box::new(
-        async_openai::Client::with_config(config)
+        async_openai_alt::Client::with_config(config)
             .with_http_client(create_reqwest_client(api_endpoint)),
     );
 
diff --git a/crates/http-api-bindings/src/rate_limit.rs b/crates/http-api-bindings/src/rate_limit.rs
index 5636986f4495..6f30a617b4c5 100644
--- a/crates/http-api-bindings/src/rate_limit.rs
+++ b/crates/http-api-bindings/src/rate_limit.rs
@@ -1,4 +1,4 @@
-use async_openai::{
+use async_openai_alt::{
     error::OpenAIError,
     types::{
         ChatCompletionResponseStream, CreateChatCompletionRequest, CreateChatCompletionResponse,
diff --git a/crates/llama-cpp-server/Cargo.toml b/crates/llama-cpp-server/Cargo.toml
index 43d2b5223beb..4ff119a97d72 100644
--- a/crates/llama-cpp-server/Cargo.toml
+++ b/crates/llama-cpp-server/Cargo.toml
@@ -24,7 +24,7 @@ anyhow.workspace = true
 which = "6"
 serde.workspace = true
 serdeconv.workspace = true
-async-openai.workspace = true
+async-openai-alt.workspace = true
 
 [build-dependencies]
 cmake = "0.1"
diff --git a/crates/llama-cpp-server/src/lib.rs b/crates/llama-cpp-server/src/lib.rs
index a7c0a30b71cb..9851bb3dfbf3 100644
--- a/crates/llama-cpp-server/src/lib.rs
+++ b/crates/llama-cpp-server/src/lib.rs
@@ -3,7 +3,7 @@ mod supervisor;
 use std::{path::PathBuf, sync::Arc};
 
 use anyhow::Result;
-use async_openai::error::OpenAIError;
+use async_openai_alt::error::OpenAIError;
 use async_trait::async_trait;
 use futures::stream::BoxStream;
 use serde::Deserialize;
@@ -161,15 +161,15 @@ impl ChatCompletionServer {
 impl ChatCompletionStream for ChatCompletionServer {
     async fn chat(
         &self,
-        request: async_openai::types::CreateChatCompletionRequest,
-    ) -> Result<async_openai::types::CreateChatCompletionResponse, OpenAIError> {
+        request: async_openai_alt::types::CreateChatCompletionRequest,
+    ) -> Result<async_openai_alt::types::CreateChatCompletionResponse, OpenAIError> {
         self.chat_completion.chat(request).await
     }
 
     async fn chat_stream(
         &self,
-        request: async_openai::types::CreateChatCompletionRequest,
-    ) -> Result<async_openai::types::ChatCompletionResponseStream, OpenAIError> {
+        request: async_openai_alt::types::CreateChatCompletionRequest,
+    ) -> Result<async_openai_alt::types::ChatCompletionResponseStream, OpenAIError> {
         self.chat_completion.chat_stream(request).await
     }
 }
diff --git a/crates/tabby-inference/Cargo.toml b/crates/tabby-inference/Cargo.toml
index c362b809d0dd..6b9854ba848d 100644
--- a/crates/tabby-inference/Cargo.toml
+++ b/crates/tabby-inference/Cargo.toml
@@ -16,7 +16,7 @@ derive_builder.workspace = true
 futures = { workspace = true }
 tabby-common = { path = "../tabby-common" }
 trie-rs = "0.1.1"
-async-openai.workspace = true
+async-openai-alt.workspace = true
 secrecy = "0.8"
 reqwest.workspace = true
 tracing.workspace = true
diff --git a/crates/tabby-inference/src/chat.rs b/crates/tabby-inference/src/chat.rs
index 5ef447daaa3a..ff3b2d1672d3 100644
--- a/crates/tabby-inference/src/chat.rs
+++ b/crates/tabby-inference/src/chat.rs
@@ -1,4 +1,4 @@
-use async_openai::{
+use async_openai_alt::{
     config::OpenAIConfig,
     error::OpenAIError,
     types::{
@@ -85,7 +85,7 @@ impl ExtendedOpenAIConfig {
     }
 }
 
-impl async_openai::config::Config for ExtendedOpenAIConfig {
+impl async_openai_alt::config::Config for ExtendedOpenAIConfig {
     fn headers(&self) -> reqwest::header::HeaderMap {
         self.base.headers()
     }
@@ -108,7 +108,7 @@ impl async_openai::config::Config for ExtendedOpenAIConfig {
 }
 
 #[async_trait]
-impl ChatCompletionStream for async_openai::Client<ExtendedOpenAIConfig> {
+impl ChatCompletionStream for async_openai_alt::Client<ExtendedOpenAIConfig> {
     async fn chat(
         &self,
         request: CreateChatCompletionRequest,
diff --git a/crates/tabby/Cargo.toml b/crates/tabby/Cargo.toml
index be26919f1038..0aa663557e92 100644
--- a/crates/tabby/Cargo.toml
+++ b/crates/tabby/Cargo.toml
@@ -59,7 +59,7 @@ axum-prometheus = "0.6"
 uuid.workspace = true
 color-eyre = { version = "0.6.3" }
 reqwest.workspace = true
-async-openai.workspace = true
+async-openai-alt.workspace = true
 spinners = "4.1.1"
 regex.workspace = true
 
diff --git a/crates/tabby/src/routes/chat.rs b/crates/tabby/src/routes/chat.rs
index d8a1f84d81d6..95ce25fcccbf 100644
--- a/crates/tabby/src/routes/chat.rs
+++ b/crates/tabby/src/routes/chat.rs
@@ -1,6 +1,6 @@
 use std::sync::Arc;
 
-use async_openai::error::OpenAIError;
+use async_openai_alt::error::OpenAIError;
 use axum::{
     extract::State,
     response::sse::{Event, KeepAlive, Sse},
@@ -36,7 +36,7 @@ pub async fn chat_completions_utoipa(_request: Json<serde_json::Value>) -> Statu
 pub async fn chat_completions(
     State(state): State<Arc<dyn ChatCompletionStream>>,
     TypedHeader(MaybeUser(user)): TypedHeader<MaybeUser>,
-    Json(mut request): Json<async_openai::types::CreateChatCompletionRequest>,
+    Json(mut request): Json<async_openai_alt::types::CreateChatCompletionRequest>,
 ) -> Result<Sse<impl Stream<Item = Result<Event, anyhow::Error>>>, StatusCode> {
     if let Some(user) = user {
         request.user.replace(user);
diff --git a/ee/tabby-schema/Cargo.toml b/ee/tabby-schema/Cargo.toml
index a6849542d615..b12e6b9e52cd 100644
--- a/ee/tabby-schema/Cargo.toml
+++ b/ee/tabby-schema/Cargo.toml
@@ -10,7 +10,7 @@ schema-language = ["juniper/schema-language"]
 
 [dependencies]
 anyhow.workspace = true
-async-openai.workspace = true
+async-openai-alt.workspace = true
 async-trait.workspace = true
 axum = { workspace = true }
 base64 = "0.22.0"
diff --git a/ee/tabby-schema/src/schema/mod.rs b/ee/tabby-schema/src/schema/mod.rs
index c6c75e6e954d..6424a9a1f8ad 100644
--- a/ee/tabby-schema/src/schema/mod.rs
+++ b/ee/tabby-schema/src/schema/mod.rs
@@ -20,7 +20,7 @@ pub mod worker;
 use std::{sync::Arc, time::Instant};
 
 use access_policy::{AccessPolicyService, SourceIdAccessPolicy};
-use async_openai::{
+use async_openai_alt::{
     error::OpenAIError,
     types::{
         ChatCompletionRequestMessage, ChatCompletionRequestUserMessageArgs,
diff --git a/ee/tabby-webserver/Cargo.toml b/ee/tabby-webserver/Cargo.toml
index 75afa8fae5bf..e253dc8ce6bd 100644
--- a/ee/tabby-webserver/Cargo.toml
+++ b/ee/tabby-webserver/Cargo.toml
@@ -53,7 +53,7 @@ strum.workspace = true
 cron = "0.12.1"
 async-stream.workspace = true
 logkit.workspace = true
-async-openai.workspace = true
+async-openai-alt.workspace = true
 ratelimit.workspace = true
 cached.workspace = true
 
diff --git a/ee/tabby-webserver/src/service/answer.rs b/ee/tabby-webserver/src/service/answer.rs
index 821b6c5a5a1c..f355f61f93e9 100644
--- a/ee/tabby-webserver/src/service/answer.rs
+++ b/ee/tabby-webserver/src/service/answer.rs
@@ -7,11 +7,14 @@ use std::{
 };
 
 use anyhow::anyhow;
-use async_openai::{
+use async_openai_alt::{
     error::OpenAIError,
     types::{
+        ChatCompletionRequestAssistantMessage, ChatCompletionRequestAssistantMessageContent,
         ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage,
-        ChatCompletionRequestUserMessageArgs, CreateChatCompletionRequestArgs, Role,
+        ChatCompletionRequestSystemMessageContent, ChatCompletionRequestUserMessage,
+        ChatCompletionRequestUserMessageArgs, ChatCompletionRequestUserMessageContent,
+        CreateChatCompletionRequestArgs, Role,
     },
 };
 use async_stream::stream;
@@ -438,8 +441,9 @@ fn convert_messages_to_chat_completion_request(
     if !config.system_prompt.is_empty() {
         output.push(ChatCompletionRequestMessage::System(
             ChatCompletionRequestSystemMessage {
-                content: config.system_prompt.clone(),
-                role: Role::System,
+                content: ChatCompletionRequestSystemMessageContent::Text(
+                    config.system_prompt.clone(),
+                ),
                 name: None,
             },
         ));
@@ -452,36 +456,42 @@ fn convert_messages_to_chat_completion_request(
             thread::Role::User => Role::User,
         };
 
-        let content = if role == Role::User {
+        let message: ChatCompletionRequestMessage = if role == Role::User {
             if i % 2 != 0 {
                 bail!("User message must be followed by assistant message");
             }
 
             let y = &messages[i + 1];
 
-            build_user_prompt(&x.content, &y.attachment, None)
+            let content = build_user_prompt(&x.content, &y.attachment, None);
+            ChatCompletionRequestMessage::User(ChatCompletionRequestUserMessage {
+                content: ChatCompletionRequestUserMessageContent::Text(
+                    helper.rewrite_tag(&content),
+                ),
+                ..Default::default()
+            })
         } else {
-            x.content.clone()
+            ChatCompletionRequestMessage::Assistant(ChatCompletionRequestAssistantMessage {
+                content: Some(ChatCompletionRequestAssistantMessageContent::Text(
+                    x.content.clone(),
+                )),
+                ..Default::default()
+            })
         };
 
-        output.push(ChatCompletionRequestMessage::System(
-            ChatCompletionRequestSystemMessage {
-                content: helper.rewrite_tag(&content),
-                role,
-                name: None,
-            },
-        ));
+        output.push(message);
     }
 
-    output.push(ChatCompletionRequestMessage::System(
-        ChatCompletionRequestSystemMessage {
-            content: helper.rewrite_tag(&build_user_prompt(
-                &messages[messages.len() - 1].content,
-                attachment,
-                user_attachment_input,
+    output.push(ChatCompletionRequestMessage::User(
+        ChatCompletionRequestUserMessage {
+            content: ChatCompletionRequestUserMessageContent::Text(helper.rewrite_tag(
+                &build_user_prompt(
+                    &messages[messages.len() - 1].content,
+                    attachment,
+                    user_attachment_input,
+                ),
             )),
-            role: Role::User,
-            name: None,
+            ..Default::default()
         },
     ));
 
diff --git a/ee/tabby-webserver/src/service/answer/testutils/mod.rs b/ee/tabby-webserver/src/service/answer/testutils/mod.rs
index a189f6cecb7b..1ab247a49398 100644
--- a/ee/tabby-webserver/src/service/answer/testutils/mod.rs
+++ b/ee/tabby-webserver/src/service/answer/testutils/mod.rs
@@ -1,6 +1,6 @@
 use std::sync::Arc;
 
-use async_openai::{
+use async_openai_alt::{
     error::OpenAIError,
     types::{
         ChatChoice, ChatChoiceStream, ChatCompletionResponseMessage, ChatCompletionResponseStream,
@@ -44,7 +44,7 @@ impl ChatCompletionStream for FakeChatCompletionStream {
         _request: CreateChatCompletionRequest,
     ) -> Result<CreateChatCompletionResponse, OpenAIError> {
         if self.return_error {
-            return Err(OpenAIError::ApiError(async_openai::error::ApiError {
+            return Err(OpenAIError::ApiError(async_openai_alt::error::ApiError {
                 message: "error".to_string(),
                 code: None,
                 param: None,