Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: update engine interface to allow 3rd to provide engine #311

Merged
merged 2 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ project(cortex.llamacpp)
SET(TARGET engine)

if(UNIX AND NOT APPLE)
add_compile_definitions(LINUX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -pthread")
add_compile_options(-fPIC -pthread)
find_package(Threads)
Expand Down Expand Up @@ -53,4 +54,4 @@ target_include_directories(${TARGET} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp
${THIRD_PARTY_PATH}/include)

target_compile_features(${TARGET} PUBLIC cxx_std_17)
target_compile_features(${TARGET} PUBLIC cxx_std_17)
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ If you don't have git, you can download the source code as a file archive from [
- **On MacOS with Apple Silicon:**

```bash
make build-example-server CMAKE_EXTRA_FLAGS="-DGGML_METAL_EMBED_LIBRARY=ON"
make build-example-server CMAKE_EXTRA_FLAGS="-DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DGGML_METAL_EMBED_LIBRARY=ON"
```

- **On MacOS with Intel processors:**
Expand Down
33 changes: 32 additions & 1 deletion base/cortex-common/enginei.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#pragma once

#include <filesystem>
#include <functional>
#include <memory>
#include <vector>

#include "json/value.h"
#include "trantor/utils/Logger.h"
Expand All @@ -10,8 +12,37 @@
// Note: only append new function to keep the compatibility.
class EngineI {
public:
struct RegisterLibraryOption {
std::vector<std::filesystem::path> paths;
};

struct EngineLoadOption {
// engine
std::filesystem::path engine_path;
std::filesystem::path cuda_path;
bool custom_engine_path;

// logging
std::filesystem::path log_path;
int max_log_lines;
trantor::Logger::LogLevel log_level;
};

struct EngineUnloadOption {
bool unload_dll;
};

virtual ~EngineI() {}

/**
* Being called before starting process to register dependencies search paths.
*/
virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0;

virtual void Load(EngineLoadOption opts) = 0;

virtual void Unload(EngineUnloadOption opts) = 0;

virtual void HandleChatCompletion(
std::shared_ptr<Json::Value> json_body,
std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
Expand Down Expand Up @@ -46,4 +77,4 @@ class EngineI {
virtual void SetFileLogger(int max_log_lines,
const std::string& log_path) = 0;
virtual void SetLogLevel(trantor::Logger::LogLevel log_level) = 0;
};
};
69 changes: 68 additions & 1 deletion src/llama_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ std::shared_ptr<InferenceState> CreateInferenceState(LlamaServerContext& l) {
}

Json::Value CreateEmbeddingPayload(const std::vector<float>& embedding,

int index, bool is_base64) {
Json::Value dataItem;
dataItem["object"] = "embedding";
Expand All @@ -111,6 +110,7 @@ Json::Value CreateEmbeddingPayload(const std::vector<float>& embedding,

return dataItem;
}

std::vector<int> getUTF8Bytes(const std::string& str) {
std::vector<int> bytes;
for (unsigned char c : str) {
Expand Down Expand Up @@ -268,6 +268,71 @@ std::string CreateReturnJson(const std::string& id, const std::string& model,
}
} // namespace

void LlamaEngine::RegisterLibraryPath(RegisterLibraryOption opts) {
#if defined(LINUX)
const char* name = "LD_LIBRARY_PATH";
std::string v;
if (auto g = getenv(name); g) {
v += g;
}
LOG_DEBUG << "LD_LIBRARY_PATH before: " << v;

for (const auto& p : opts.paths) {
v += p.string() + ":" + v;
}

setenv(name, v.c_str(), true);
LOG_DEBUG << "LD_LIBRARY_PATH after: " << getenv(name);
#endif
}

void LlamaEngine::Load(EngineLoadOption opts) {
LOG_INFO << "Loading engine..";

LOG_DEBUG << "Use custom engine path: " << opts.custom_engine_path;
LOG_DEBUG << "Engine path: " << opts.engine_path.string();

SetFileLogger(opts.max_log_lines, opts.log_path.string());
SetLogLevel(opts.log_level);

#if defined(_WIN32)
if (!opts.custom_engine_path) {
if (auto cookie = AddDllDirectory(opts.engine_path.c_str()); cookie != 0) {
LOG_INFO << "Added dll directory: " << opts.engine_path.string();
cookies_.push_back(cookie);
} else {
LOG_WARN << "Could not add dll directory: " << opts.engine_path.string();
}

if (auto cuda_cookie = AddDllDirectory(opts.cuda_path.c_str());
cuda_cookie != 0) {
LOG_INFO << "Added cuda dll directory: " << opts.cuda_path.string();
cookies_.push_back(cuda_cookie);
} else {
LOG_WARN << "Could not add cuda dll directory: "
<< opts.cuda_path.string();
}
}
#endif
LOG_INFO << "Engine loaded successfully";
}

void LlamaEngine::Unload(EngineUnloadOption opts) {
LOG_INFO << "Unloading engine..";
LOG_DEBUG << "Unload dll: " << opts.unload_dll;

if (opts.unload_dll) {
#if defined(_WIN32)
for (const auto& cookie : cookies_) {
if (!RemoveDllDirectory(cookie)) {
LOG_WARN << "Could not remove dll directory";
}
}
#endif
}
LOG_INFO << "Engine unloaded successfully";
}

LlamaEngine::LlamaEngine(int log_option) {
trantor::Logger::setLogLevel(trantor::Logger::kInfo);
if (log_option == kFileLoggerOption) {
Expand Down Expand Up @@ -300,6 +365,8 @@ LlamaEngine::~LlamaEngine() {
}
server_map_.clear();
async_file_logger_.reset();

LOG_INFO << "LlamaEngine destructed successfully";
}

void LlamaEngine::HandleChatCompletion(
Expand Down
16 changes: 15 additions & 1 deletion src/llama_engine.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#pragma once

#include <trantor/utils/AsyncFileLogger.h>
#include "chat_completion_request.h"
#include "cortex-common/enginei.h"
Expand All @@ -10,9 +11,18 @@

class LlamaEngine : public EngineI {
public:
constexpr static auto kEngineName = "cortex.llamacpp";

LlamaEngine(int log_option = 0);
~LlamaEngine() final;

// #### Interface ####
void RegisterLibraryPath(RegisterLibraryOption opts) final;

void Load(EngineLoadOption opts) final;

void Unload(EngineUnloadOption opts) final;

void HandleChatCompletion(
std::shared_ptr<Json::Value> jsonBody,
std::function<void(Json::Value&&, Json::Value&&)>&& callback) final;
Expand Down Expand Up @@ -74,4 +84,8 @@ class LlamaEngine : public EngineI {

bool print_version_ = true;
std::unique_ptr<trantor::FileLogger> async_file_logger_;
};

#if defined(_WIN32)
std::vector<DLL_DIRECTORY_COOKIE> cookies_;
#endif
};
Loading