update docs

janhq · Dec 3, 2024 · 7332878 · 7332878
1 parent bfff12a
commit 7332878
Show file tree

Hide file tree

Showing 5 changed files with 79 additions and 22 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -3,6 +3,7 @@ project(cortex.llamacpp)
 SET(TARGET engine)
 
 if(UNIX AND NOT APPLE)
+  add_compile_definitions(LINUX)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -pthread")
   add_compile_options(-fPIC -pthread)
   find_package(Threads)
@@ -53,4 +54,4 @@ target_include_directories(${TARGET} PRIVATE
   ${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp
   ${THIRD_PARTY_PATH}/include)
 
-target_compile_features(${TARGET} PUBLIC cxx_std_17)
+target_compile_features(${TARGET} PUBLIC cxx_std_17)
diff --git a/README.md b/README.md
@@ -44,7 +44,7 @@ If you don't have git, you can download the source code as a file archive from [
 - **On MacOS with Apple Silicon:**
 
   ```bash
-  make build-example-server CMAKE_EXTRA_FLAGS="-DGGML_METAL_EMBED_LIBRARY=ON"
+  make build-example-server CMAKE_EXTRA_FLAGS="-DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DGGML_METAL_EMBED_LIBRARY=ON"
   ```
 
 - **On MacOS with Intel processors:**

diff --git a/base/cortex-common/enginei.h b/base/cortex-common/enginei.h
@@ -3,6 +3,7 @@
 #include <filesystem>
 #include <functional>
 #include <memory>
+#include <vector>
 
 #include "json/value.h"
 #include "trantor/utils/Logger.h"
@@ -11,16 +12,37 @@
 // Note: only append new function to keep the compatibility.
 class EngineI {
  public:
+  struct RegisterLibraryOption {
+    std::vector<std::filesystem::path> paths;
+  };
+
   struct EngineLoadOption {
+    // engine
     std::filesystem::path engine_path;
-    std::filesystem::path cuda_path;  // TODO: make this more generic
+    std::filesystem::path cuda_path;
     bool custom_engine_path;
+
+    // logging
+    std::filesystem::path log_path;
+    int max_log_lines;
+    trantor::Logger::LogLevel log_level;
+  };
+
+  struct EngineUnloadOption {
+    bool unload_dll;
   };
 
   virtual ~EngineI() {}
 
+  /**
+   * Being called before starting process to register dependencies search paths.
+   */
+  virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0;
+
   virtual void Load(EngineLoadOption opts) = 0;
 
+  virtual void Unload(EngineUnloadOption opts) = 0;
+
   virtual void HandleChatCompletion(
       std::shared_ptr<Json::Value> json_body,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) = 0;

diff --git a/src/llama_engine.cc b/src/llama_engine.cc
@@ -88,7 +88,6 @@ std::shared_ptr<InferenceState> CreateInferenceState(LlamaServerContext& l) {
 }
 
 Json::Value CreateEmbeddingPayload(const std::vector<float>& embedding,
-
                                    int index, bool is_base64) {
   Json::Value dataItem;
   dataItem["object"] = "embedding";
@@ -111,6 +110,7 @@ Json::Value CreateEmbeddingPayload(const std::vector<float>& embedding,
 
   return dataItem;
 }
+
 std::vector<int> getUTF8Bytes(const std::string& str) {
   std::vector<int> bytes;
   for (unsigned char c : str) {
@@ -268,33 +268,71 @@ std::string CreateReturnJson(const std::string& id, const std::string& model,
 }
 }  // namespace
 
-void LlamaEngine::Load(EngineLoadOption opts) final {
+void LlamaEngine::RegisterLibraryPath(RegisterLibraryOption opts) {
+#if defined(LINUX)
+  const char* name = "LD_LIBRARY_PATH";
+  std::string v;
+  if (auto g = getenv(name); g) {
+    v += g;
+  }
+  LOG_DEBUG << "LD_LIBRARY_PATH before: " << v;
+
+  for (const auto& p : opts.paths) {
+    v += p.string() + ":" + v;
+  }
+
+  setenv(name, v.c_str(), true);
+  LOG_DEBUG << "LD_LIBRARY_PATH after: " << getenv(name);
+#endif
+}
+
+void LlamaEngine::Load(EngineLoadOption opts) {
   LOG_INFO << "Loading engine..";
 
   LOG_DEBUG << "Use custom engine path: " << opts.custom_engine_path;
-  LOG_DEBUG << "Engine path: " << opts.engine_path;
+  LOG_DEBUG << "Engine path: " << opts.engine_path.string();
+
+  SetFileLogger(opts.max_log_lines, opts.log_path.string());
+  SetLogLevel(opts.log_level);
 
 #if defined(_WIN32)
   if (!opts.custom_engine_path) {
-    if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) {
-      LOG_INFO << "Added dll directory: " << p.string();
+    if (auto cookie = AddDllDirectory(opts.engine_path.c_str()); cookie != 0) {
+      LOG_INFO << "Added dll directory: " << opts.engine_path.string();
       cookies_.push_back(cookie);
     } else {
-      LOG_WARN << "Could not add dll directory: " << p.string();
+      LOG_WARN << "Could not add dll directory: " << opts.engine_path.string();
     }
 
     if (auto cuda_cookie = AddDllDirectory(opts.cuda_path.c_str());
         cuda_cookie != 0) {
-      LOG_INFO << "Added cuda dll directory: " << p.string();
-      cookies_.push_back(cookie);
+      LOG_INFO << "Added cuda dll directory: " << opts.cuda_path.string();
+      cookies_.push_back(cuda_cookie);
     } else {
-      LOG_WARN << "Could not add cuda dll directory: " << p.string();
+      LOG_WARN << "Could not add cuda dll directory: "
+               << opts.cuda_path.string();
     }
   }
 #endif
   LOG_INFO << "Engine loaded successfully";
 }
 
+void LlamaEngine::Unload(EngineUnloadOption opts) {
+  LOG_INFO << "Unloading engine..";
+  LOG_DEBUG << "Unload dll: " << opts.unload_dll;
+
+  if (opts.unload_dll) {
+#if defined(_WIN32)
+    for (const auto& cookie : cookies_) {
+      if (!RemoveDllDirectory(cookie)) {
+        LOG_WARN << "Could not remove dll directory";
+      }
+    }
+#endif
+  }
+  LOG_INFO << "Engine unloaded successfully";
+}
+
 LlamaEngine::LlamaEngine(int log_option) {
   trantor::Logger::setLogLevel(trantor::Logger::kInfo);
   if (log_option == kFileLoggerOption) {
@@ -321,22 +359,14 @@ LlamaEngine::LlamaEngine(int log_option) {
 }
 
 LlamaEngine::~LlamaEngine() {
-  LOG_INFO << "Unloading engine..";
   for (auto& [_, si] : server_map_) {
     auto& l = si.ctx;
     l.ReleaseResources();
   }
   server_map_.clear();
   async_file_logger_.reset();
 
-#if defined(_WIN32)
-  for (const auto& cookie : cookies_) {
-    if (!RemoveDllDirectory(cookie)) {
-      LOG_WARN << "Could not remove dll directory";
-    }
-  }
-#endif
-  LOG_INFO << "Engine unloaded successfully";
+  LOG_INFO << "LlamaEngine destructed successfully";
 }
 
 void LlamaEngine::HandleChatCompletion(

diff --git a/src/llama_engine.h b/src/llama_engine.h
@@ -11,14 +11,18 @@
 
 class LlamaEngine : public EngineI {
  public:
-  constexpr auto kEngineName = "cortex.llamacpp";
+  constexpr static auto kEngineName = "cortex.llamacpp";
 
   LlamaEngine(int log_option = 0);
   ~LlamaEngine() final;
 
   // #### Interface ####
+  void RegisterLibraryPath(RegisterLibraryOption opts) final;
+
   void Load(EngineLoadOption opts) final;
 
+  void Unload(EngineUnloadOption opts) final;
+
   void HandleChatCompletion(
       std::shared_ptr<Json::Value> jsonBody,
       std::function<void(Json::Value&&, Json::Value&&)>&& callback) final;