Merge pull request #311 from janhq/j/update-engine-interface

namchuai · web-flow · commit 7f8ed0d7d3f9 · 2024-12-03T20:02:36.000+07:00
feat: update engine interface to allow 3rd to provide engine
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -3,6 +3,7 @@ project(cortex.llamacpp)
 SET(TARGET engine)
 
 if(UNIX AND NOT APPLE)
+  add_compile_definitions(LINUX)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -pthread")
   add_compile_options(-fPIC -pthread)
   find_package(Threads)
@@ -53,4 +54,4 @@ target_include_directories(${TARGET} PRIVATE
   ${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp
   ${THIRD_PARTY_PATH}/include)
 
-target_compile_features(${TARGET} PUBLIC cxx_std_17)
+target_compile_features(${TARGET} PUBLIC cxx_std_17)
diff --git a/base/cortex-common/enginei.h b/base/cortex-common/enginei.h
@@ -1,7 +1,9 @@
 #pragma once
 
+#include <filesystem>
 #include <functional>
 #include <memory>
+#include <vector>
 
 #include "json/value.h"
 #include "trantor/utils/Logger.h"
@@ -10,8 +12,37 @@
 // Note: only append new function to keep the compatibility.
 class EngineI {
  public:
+  struct RegisterLibraryOption {
+    std::vector<std::filesystem::path> paths;
+  };
+
+  struct EngineLoadOption {
+    // engine
+    std::filesystem::path engine_path;
+    std::filesystem::path cuda_path;
+    bool custom_engine_path;
+
+    // logging
+    std::filesystem::path log_path;
+    int max_log_lines;
+    trantor::Logger::LogLevel log_level;
+  };
+
+  struct EngineUnloadOption {
+    bool unload_dll;
+  };
+
   virtual ~EngineI() {}
 
+  /**
+   * Being called before starting process to register dependencies search paths.
+   */
+  virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0;
+
+  virtual void Load(EngineLoadOption opts) = 0;
+
+  virtual void Unload(EngineUnloadOption opts) = 0;
+
   virtual void HandleChatCompletion(
       std::shared_ptr<Json::Value> json_body,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;
@@ -46,4 +77,4 @@ class EngineI {
   virtual void SetFileLogger(int max_log_lines,
                              const std::string& log_path) = 0;
   virtual void SetLogLevel(trantor::Logger::LogLevel log_level) = 0;
-};
+};
diff --git a/src/llama_engine.cc b/src/llama_engine.cc
@@ -91,7 +91,6 @@ std::shared_ptr<InferenceState> CreateInferenceState(LlamaServerContext& l) {
 }
 
 Json::Value CreateEmbeddingPayload(const std::vector<float>& embedding,
-
                                    int index, bool is_base64) {
   Json::Value dataItem;
   dataItem["object"] = "embedding";
@@ -114,6 +113,7 @@ Json::Value CreateEmbeddingPayload(const std::vector<float>& embedding,
 
   return dataItem;
 }
+
 std::vector<int> getUTF8Bytes(const std::string& str) {
   std::vector<int> bytes;
   for (unsigned char c : str) {
@@ -271,6 +271,71 @@ std::string CreateReturnJson(const std::string& id, const std::string& model,
 }
 }  // namespace
 
+void LlamaEngine::RegisterLibraryPath(RegisterLibraryOption opts) {
+#if defined(LINUX)
+  const char* name = "LD_LIBRARY_PATH";
+  std::string v;
+  if (auto g = getenv(name); g) {
+    v += g;
+  }
+  LOG_DEBUG << "LD_LIBRARY_PATH before: " << v;
+
+  for (const auto& p : opts.paths) {
+    v += p.string() + ":" + v;
+  }
+
+  setenv(name, v.c_str(), true);
+  LOG_DEBUG << "LD_LIBRARY_PATH after: " << getenv(name);
+#endif
+}
+
+void LlamaEngine::Load(EngineLoadOption opts) {
+  LOG_INFO << "Loading engine..";
+
+  LOG_DEBUG << "Use custom engine path: " << opts.custom_engine_path;
+  LOG_DEBUG << "Engine path: " << opts.engine_path.string();
+
+  SetFileLogger(opts.max_log_lines, opts.log_path.string());
+  SetLogLevel(opts.log_level);
+
+#if defined(_WIN32)
+  if (!opts.custom_engine_path) {
+    if (auto cookie = AddDllDirectory(opts.engine_path.c_str()); cookie != 0) {
+      LOG_INFO << "Added dll directory: " << opts.engine_path.string();
+      cookies_.push_back(cookie);
+    } else {
+      LOG_WARN << "Could not add dll directory: " << opts.engine_path.string();
+    }
+
+    if (auto cuda_cookie = AddDllDirectory(opts.cuda_path.c_str());
+        cuda_cookie != 0) {
+      LOG_INFO << "Added cuda dll directory: " << opts.cuda_path.string();
+      cookies_.push_back(cuda_cookie);
+    } else {
+      LOG_WARN << "Could not add cuda dll directory: "
+               << opts.cuda_path.string();
+    }
+  }
+#endif
+  LOG_INFO << "Engine loaded successfully";
+}
+
+void LlamaEngine::Unload(EngineUnloadOption opts) {
+  LOG_INFO << "Unloading engine..";
+  LOG_DEBUG << "Unload dll: " << opts.unload_dll;
+
+  if (opts.unload_dll) {
+#if defined(_WIN32)
+    for (const auto& cookie : cookies_) {
+      if (!RemoveDllDirectory(cookie)) {
+        LOG_WARN << "Could not remove dll directory";
+      }
+    }
+#endif
+  }
+  LOG_INFO << "Engine unloaded successfully";
+}
+
 LlamaEngine::LlamaEngine(int log_option) {
   trantor::Logger::setLogLevel(trantor::Logger::kInfo);
   if (log_option == kFileLoggerOption) {
@@ -303,6 +368,8 @@ LlamaEngine::~LlamaEngine() {
   }
   server_map_.clear();
   async_file_logger_.reset();
+
+  LOG_INFO << "LlamaEngine destructed successfully";
 }
 
 void LlamaEngine::HandleChatCompletion(
diff --git a/src/llama_engine.h b/src/llama_engine.h
@@ -1,4 +1,5 @@
 #pragma once
+
 #include <trantor/utils/AsyncFileLogger.h>
 #include "chat_completion_request.h"
 #include "cortex-common/enginei.h"
@@ -10,9 +11,18 @@
 
 class LlamaEngine : public EngineI {
  public:
+  constexpr static auto kEngineName = "cortex.llamacpp";
+
   LlamaEngine(int log_option = 0);
   ~LlamaEngine() final;
+
   // #### Interface ####
+  void RegisterLibraryPath(RegisterLibraryOption opts) final;
+
+  void Load(EngineLoadOption opts) final;
+
+  void Unload(EngineUnloadOption opts) final;
+
   void HandleChatCompletion(
       std::shared_ptr<Json::Value> jsonBody,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) final;
@@ -74,4 +84,8 @@ class LlamaEngine : public EngineI {
 
   bool print_version_ = true;
   std::unique_ptr<trantor::FileLogger> async_file_logger_;
-};
+
+#if defined(_WIN32)
+  std::vector<DLL_DIRECTORY_COOKIE> cookies_;
+#endif
+};