From 9d7c3c0841b34688fe60d978e33486637c92e828 Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Sun, 5 Jan 2025 01:03:59 +0800 Subject: [PATCH] refactor: use async-openai-alt and upgrade to 0.26.1 Signed-off-by: Wei Zhang --- Cargo.lock | 36 ++++++------- Cargo.toml | 2 +- crates/http-api-bindings/Cargo.toml | 2 +- crates/http-api-bindings/src/chat/mod.rs | 4 +- crates/http-api-bindings/src/rate_limit.rs | 2 +- crates/llama-cpp-server/Cargo.toml | 2 +- crates/llama-cpp-server/src/lib.rs | 10 ++-- crates/tabby-inference/Cargo.toml | 2 +- crates/tabby-inference/src/chat.rs | 6 +-- crates/tabby/Cargo.toml | 2 +- crates/tabby/src/routes/chat.rs | 4 +- ee/tabby-schema/Cargo.toml | 2 +- ee/tabby-schema/src/schema/mod.rs | 2 +- ee/tabby-webserver/Cargo.toml | 2 +- ee/tabby-webserver/src/service/answer.rs | 54 +++++++++++-------- .../src/service/answer/testutils/mod.rs | 4 +- 16 files changed, 73 insertions(+), 63 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fe613095e0b4..08a3bb69bd61 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -187,16 +187,17 @@ dependencies = [ ] [[package]] -name = "async-openai" -version = "0.20.0" +name = "async-openai-alt" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11e97f9c5e0ee3260caee9700ba1bb61a6fdc34d2b6786a31e018c5de5198491" +checksum = "2df183306e5fa71c7a5af4571e10504806a3d47825e172824fddee8ed9182cbf" dependencies = [ "async-convert", "backoff", "base64 0.22.1", "bytes", "derive_builder", + "eventsource-stream", "futures", "rand 0.8.5", "reqwest", @@ -1915,7 +1916,7 @@ name = "http-api-bindings" version = "0.24.0-dev.0" dependencies = [ "anyhow", - "async-openai", + "async-openai-alt", "async-stream", "async-trait", "futures", @@ -2608,7 +2609,7 @@ name = "llama-cpp-server" version = "0.24.0-dev.0" dependencies = [ "anyhow", - "async-openai", + "async-openai-alt", "async-trait", "cmake", "futures", @@ -2872,13 +2873,13 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.11" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ "libc", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -5266,7 +5267,7 @@ version = "0.24.0-dev.0" dependencies = [ "anyhow", "assert-json-diff", - "async-openai", + "async-openai-alt", "async-stream", "async-trait", "axum", @@ -5482,7 +5483,7 @@ name = "tabby-inference" version = "0.24.0-dev.0" dependencies = [ "anyhow", - "async-openai", + "async-openai-alt", "async-stream", "async-trait", "dashmap", @@ -5500,7 +5501,7 @@ name = "tabby-schema" version = "0.24.0-dev.0" dependencies = [ "anyhow", - "async-openai", + "async-openai-alt", "async-trait", "axum", "base64 0.22.1", @@ -5529,7 +5530,7 @@ dependencies = [ "anyhow", "argon2", "assert_matches", - "async-openai", + "async-openai-alt", "async-stream", "async-trait", "axum", @@ -5888,28 +5889,27 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.37.0" +version = "1.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" +checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551" dependencies = [ "backtrace", "bytes", "libc", "mio", - "num_cpus", "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "tokio-macros" -version = "2.2.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 99f2d05c9bb6..865bbace6bcf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,7 +68,7 @@ mime_guess = "2.0.4" assert_matches = "1.5" insta = "1.34.0" logkit = "0.3" -async-openai = "0.20" +async-openai-alt = "0.26.1" tracing-test = "0.2" clap = "4.3.0" ratelimit = "0.10" diff --git a/crates/http-api-bindings/Cargo.toml b/crates/http-api-bindings/Cargo.toml index 03dd3861241e..5fc6c44f1a27 100644 --- a/crates/http-api-bindings/Cargo.toml +++ b/crates/http-api-bindings/Cargo.toml @@ -17,7 +17,7 @@ serde_json = { workspace = true } tabby-common = { path = "../tabby-common" } tabby-inference = { path = "../tabby-inference" } ollama-api-bindings = { path = "../ollama-api-bindings" } -async-openai.workspace = true +async-openai-alt.workspace = true tokio.workspace = true tracing.workspace = true leaky-bucket = "1.1.2" diff --git a/crates/http-api-bindings/src/chat/mod.rs b/crates/http-api-bindings/src/chat/mod.rs index f30a36ed1dca..bed9a15a6fa2 100644 --- a/crates/http-api-bindings/src/chat/mod.rs +++ b/crates/http-api-bindings/src/chat/mod.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use async_openai::config::OpenAIConfig; +use async_openai_alt::config::OpenAIConfig; use tabby_common::config::HttpModelConfig; use tabby_inference::{ChatCompletionStream, ExtendedOpenAIConfig}; @@ -34,7 +34,7 @@ pub async fn create(model: &HttpModelConfig) -> Arc { let config = builder.build().expect("Failed to build config"); let engine = Box::new( - async_openai::Client::with_config(config) + async_openai_alt::Client::with_config(config) .with_http_client(create_reqwest_client(api_endpoint)), ); diff --git a/crates/http-api-bindings/src/rate_limit.rs b/crates/http-api-bindings/src/rate_limit.rs index 5636986f4495..6f30a617b4c5 100644 --- a/crates/http-api-bindings/src/rate_limit.rs +++ b/crates/http-api-bindings/src/rate_limit.rs @@ -1,4 +1,4 @@ -use async_openai::{ +use async_openai_alt::{ error::OpenAIError, types::{ ChatCompletionResponseStream, CreateChatCompletionRequest, CreateChatCompletionResponse, diff --git a/crates/llama-cpp-server/Cargo.toml b/crates/llama-cpp-server/Cargo.toml index 43d2b5223beb..4ff119a97d72 100644 --- a/crates/llama-cpp-server/Cargo.toml +++ b/crates/llama-cpp-server/Cargo.toml @@ -24,7 +24,7 @@ anyhow.workspace = true which = "6" serde.workspace = true serdeconv.workspace = true -async-openai.workspace = true +async-openai-alt.workspace = true [build-dependencies] cmake = "0.1" diff --git a/crates/llama-cpp-server/src/lib.rs b/crates/llama-cpp-server/src/lib.rs index a7c0a30b71cb..9851bb3dfbf3 100644 --- a/crates/llama-cpp-server/src/lib.rs +++ b/crates/llama-cpp-server/src/lib.rs @@ -3,7 +3,7 @@ mod supervisor; use std::{path::PathBuf, sync::Arc}; use anyhow::Result; -use async_openai::error::OpenAIError; +use async_openai_alt::error::OpenAIError; use async_trait::async_trait; use futures::stream::BoxStream; use serde::Deserialize; @@ -161,15 +161,15 @@ impl ChatCompletionServer { impl ChatCompletionStream for ChatCompletionServer { async fn chat( &self, - request: async_openai::types::CreateChatCompletionRequest, - ) -> Result { + request: async_openai_alt::types::CreateChatCompletionRequest, + ) -> Result { self.chat_completion.chat(request).await } async fn chat_stream( &self, - request: async_openai::types::CreateChatCompletionRequest, - ) -> Result { + request: async_openai_alt::types::CreateChatCompletionRequest, + ) -> Result { self.chat_completion.chat_stream(request).await } } diff --git a/crates/tabby-inference/Cargo.toml b/crates/tabby-inference/Cargo.toml index c362b809d0dd..6b9854ba848d 100644 --- a/crates/tabby-inference/Cargo.toml +++ b/crates/tabby-inference/Cargo.toml @@ -16,7 +16,7 @@ derive_builder.workspace = true futures = { workspace = true } tabby-common = { path = "../tabby-common" } trie-rs = "0.1.1" -async-openai.workspace = true +async-openai-alt.workspace = true secrecy = "0.8" reqwest.workspace = true tracing.workspace = true diff --git a/crates/tabby-inference/src/chat.rs b/crates/tabby-inference/src/chat.rs index 5ef447daaa3a..ff3b2d1672d3 100644 --- a/crates/tabby-inference/src/chat.rs +++ b/crates/tabby-inference/src/chat.rs @@ -1,4 +1,4 @@ -use async_openai::{ +use async_openai_alt::{ config::OpenAIConfig, error::OpenAIError, types::{ @@ -85,7 +85,7 @@ impl ExtendedOpenAIConfig { } } -impl async_openai::config::Config for ExtendedOpenAIConfig { +impl async_openai_alt::config::Config for ExtendedOpenAIConfig { fn headers(&self) -> reqwest::header::HeaderMap { self.base.headers() } @@ -108,7 +108,7 @@ impl async_openai::config::Config for ExtendedOpenAIConfig { } #[async_trait] -impl ChatCompletionStream for async_openai::Client { +impl ChatCompletionStream for async_openai_alt::Client { async fn chat( &self, request: CreateChatCompletionRequest, diff --git a/crates/tabby/Cargo.toml b/crates/tabby/Cargo.toml index be26919f1038..0aa663557e92 100644 --- a/crates/tabby/Cargo.toml +++ b/crates/tabby/Cargo.toml @@ -59,7 +59,7 @@ axum-prometheus = "0.6" uuid.workspace = true color-eyre = { version = "0.6.3" } reqwest.workspace = true -async-openai.workspace = true +async-openai-alt.workspace = true spinners = "4.1.1" regex.workspace = true diff --git a/crates/tabby/src/routes/chat.rs b/crates/tabby/src/routes/chat.rs index d8a1f84d81d6..95ce25fcccbf 100644 --- a/crates/tabby/src/routes/chat.rs +++ b/crates/tabby/src/routes/chat.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use async_openai::error::OpenAIError; +use async_openai_alt::error::OpenAIError; use axum::{ extract::State, response::sse::{Event, KeepAlive, Sse}, @@ -36,7 +36,7 @@ pub async fn chat_completions_utoipa(_request: Json) -> Statu pub async fn chat_completions( State(state): State>, TypedHeader(MaybeUser(user)): TypedHeader, - Json(mut request): Json, + Json(mut request): Json, ) -> Result>>, StatusCode> { if let Some(user) = user { request.user.replace(user); diff --git a/ee/tabby-schema/Cargo.toml b/ee/tabby-schema/Cargo.toml index a6849542d615..b12e6b9e52cd 100644 --- a/ee/tabby-schema/Cargo.toml +++ b/ee/tabby-schema/Cargo.toml @@ -10,7 +10,7 @@ schema-language = ["juniper/schema-language"] [dependencies] anyhow.workspace = true -async-openai.workspace = true +async-openai-alt.workspace = true async-trait.workspace = true axum = { workspace = true } base64 = "0.22.0" diff --git a/ee/tabby-schema/src/schema/mod.rs b/ee/tabby-schema/src/schema/mod.rs index c6c75e6e954d..6424a9a1f8ad 100644 --- a/ee/tabby-schema/src/schema/mod.rs +++ b/ee/tabby-schema/src/schema/mod.rs @@ -20,7 +20,7 @@ pub mod worker; use std::{sync::Arc, time::Instant}; use access_policy::{AccessPolicyService, SourceIdAccessPolicy}; -use async_openai::{ +use async_openai_alt::{ error::OpenAIError, types::{ ChatCompletionRequestMessage, ChatCompletionRequestUserMessageArgs, diff --git a/ee/tabby-webserver/Cargo.toml b/ee/tabby-webserver/Cargo.toml index 75afa8fae5bf..e253dc8ce6bd 100644 --- a/ee/tabby-webserver/Cargo.toml +++ b/ee/tabby-webserver/Cargo.toml @@ -53,7 +53,7 @@ strum.workspace = true cron = "0.12.1" async-stream.workspace = true logkit.workspace = true -async-openai.workspace = true +async-openai-alt.workspace = true ratelimit.workspace = true cached.workspace = true diff --git a/ee/tabby-webserver/src/service/answer.rs b/ee/tabby-webserver/src/service/answer.rs index 821b6c5a5a1c..f355f61f93e9 100644 --- a/ee/tabby-webserver/src/service/answer.rs +++ b/ee/tabby-webserver/src/service/answer.rs @@ -7,11 +7,14 @@ use std::{ }; use anyhow::anyhow; -use async_openai::{ +use async_openai_alt::{ error::OpenAIError, types::{ + ChatCompletionRequestAssistantMessage, ChatCompletionRequestAssistantMessageContent, ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage, - ChatCompletionRequestUserMessageArgs, CreateChatCompletionRequestArgs, Role, + ChatCompletionRequestSystemMessageContent, ChatCompletionRequestUserMessage, + ChatCompletionRequestUserMessageArgs, ChatCompletionRequestUserMessageContent, + CreateChatCompletionRequestArgs, Role, }, }; use async_stream::stream; @@ -438,8 +441,9 @@ fn convert_messages_to_chat_completion_request( if !config.system_prompt.is_empty() { output.push(ChatCompletionRequestMessage::System( ChatCompletionRequestSystemMessage { - content: config.system_prompt.clone(), - role: Role::System, + content: ChatCompletionRequestSystemMessageContent::Text( + config.system_prompt.clone(), + ), name: None, }, )); @@ -452,36 +456,42 @@ fn convert_messages_to_chat_completion_request( thread::Role::User => Role::User, }; - let content = if role == Role::User { + let message: ChatCompletionRequestMessage = if role == Role::User { if i % 2 != 0 { bail!("User message must be followed by assistant message"); } let y = &messages[i + 1]; - build_user_prompt(&x.content, &y.attachment, None) + let content = build_user_prompt(&x.content, &y.attachment, None); + ChatCompletionRequestMessage::User(ChatCompletionRequestUserMessage { + content: ChatCompletionRequestUserMessageContent::Text( + helper.rewrite_tag(&content), + ), + ..Default::default() + }) } else { - x.content.clone() + ChatCompletionRequestMessage::Assistant(ChatCompletionRequestAssistantMessage { + content: Some(ChatCompletionRequestAssistantMessageContent::Text( + x.content.clone(), + )), + ..Default::default() + }) }; - output.push(ChatCompletionRequestMessage::System( - ChatCompletionRequestSystemMessage { - content: helper.rewrite_tag(&content), - role, - name: None, - }, - )); + output.push(message); } - output.push(ChatCompletionRequestMessage::System( - ChatCompletionRequestSystemMessage { - content: helper.rewrite_tag(&build_user_prompt( - &messages[messages.len() - 1].content, - attachment, - user_attachment_input, + output.push(ChatCompletionRequestMessage::User( + ChatCompletionRequestUserMessage { + content: ChatCompletionRequestUserMessageContent::Text(helper.rewrite_tag( + &build_user_prompt( + &messages[messages.len() - 1].content, + attachment, + user_attachment_input, + ), )), - role: Role::User, - name: None, + ..Default::default() }, )); diff --git a/ee/tabby-webserver/src/service/answer/testutils/mod.rs b/ee/tabby-webserver/src/service/answer/testutils/mod.rs index a189f6cecb7b..1ab247a49398 100644 --- a/ee/tabby-webserver/src/service/answer/testutils/mod.rs +++ b/ee/tabby-webserver/src/service/answer/testutils/mod.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use async_openai::{ +use async_openai_alt::{ error::OpenAIError, types::{ ChatChoice, ChatChoiceStream, ChatCompletionResponseMessage, ChatCompletionResponseStream, @@ -44,7 +44,7 @@ impl ChatCompletionStream for FakeChatCompletionStream { _request: CreateChatCompletionRequest, ) -> Result { if self.return_error { - return Err(OpenAIError::ApiError(async_openai::error::ApiError { + return Err(OpenAIError::ApiError(async_openai_alt::error::ApiError { message: "error".to_string(), code: None, param: None,