diff --git a/CMakeLists.txt b/CMakeLists.txt index 79e7eed..013fe7f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,7 @@ project(cortex.llamacpp) SET(TARGET engine) if(UNIX AND NOT APPLE) + add_compile_definitions(LINUX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -pthread") add_compile_options(-fPIC -pthread) find_package(Threads) @@ -53,4 +54,4 @@ target_include_directories(${TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp ${THIRD_PARTY_PATH}/include) -target_compile_features(${TARGET} PUBLIC cxx_std_17) \ No newline at end of file +target_compile_features(${TARGET} PUBLIC cxx_std_17) diff --git a/README.md b/README.md index 9dba467..a3491bf 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ If you don't have git, you can download the source code as a file archive from [ - **On MacOS with Apple Silicon:** ```bash - make build-example-server CMAKE_EXTRA_FLAGS="-DGGML_METAL_EMBED_LIBRARY=ON" + make build-example-server CMAKE_EXTRA_FLAGS="-DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_COMMON=ON -DGGML_METAL_EMBED_LIBRARY=ON" ``` - **On MacOS with Intel processors:** diff --git a/base/cortex-common/enginei.h b/base/cortex-common/enginei.h index a19ab7d..2c7a918 100644 --- a/base/cortex-common/enginei.h +++ b/base/cortex-common/enginei.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "json/value.h" #include "trantor/utils/Logger.h" @@ -11,16 +12,37 @@ // Note: only append new function to keep the compatibility. class EngineI { public: + struct RegisterLibraryOption { + std::vector paths; + }; + struct EngineLoadOption { + // engine std::filesystem::path engine_path; - std::filesystem::path cuda_path; // TODO: make this more generic + std::filesystem::path cuda_path; bool custom_engine_path; + + // logging + std::filesystem::path log_path; + int max_log_lines; + trantor::Logger::LogLevel log_level; + }; + + struct EngineUnloadOption { + bool unload_dll; }; virtual ~EngineI() {} + /** + * Being called before starting process to register dependencies search paths. + */ + virtual void RegisterLibraryPath(RegisterLibraryOption opts) = 0; + virtual void Load(EngineLoadOption opts) = 0; + virtual void Unload(EngineUnloadOption opts) = 0; + virtual void HandleChatCompletion( std::shared_ptr json_body, std::function&& callback) = 0; diff --git a/src/llama_engine.cc b/src/llama_engine.cc index 96863fc..800cb7c 100644 --- a/src/llama_engine.cc +++ b/src/llama_engine.cc @@ -88,7 +88,6 @@ std::shared_ptr CreateInferenceState(LlamaServerContext& l) { } Json::Value CreateEmbeddingPayload(const std::vector& embedding, - int index, bool is_base64) { Json::Value dataItem; dataItem["object"] = "embedding"; @@ -111,6 +110,7 @@ Json::Value CreateEmbeddingPayload(const std::vector& embedding, return dataItem; } + std::vector getUTF8Bytes(const std::string& str) { std::vector bytes; for (unsigned char c : str) { @@ -268,33 +268,71 @@ std::string CreateReturnJson(const std::string& id, const std::string& model, } } // namespace -void LlamaEngine::Load(EngineLoadOption opts) final { +void LlamaEngine::RegisterLibraryPath(RegisterLibraryOption opts) { +#if defined(LINUX) + const char* name = "LD_LIBRARY_PATH"; + std::string v; + if (auto g = getenv(name); g) { + v += g; + } + LOG_DEBUG << "LD_LIBRARY_PATH before: " << v; + + for (const auto& p : opts.paths) { + v += p.string() + ":" + v; + } + + setenv(name, v.c_str(), true); + LOG_DEBUG << "LD_LIBRARY_PATH after: " << getenv(name); +#endif +} + +void LlamaEngine::Load(EngineLoadOption opts) { LOG_INFO << "Loading engine.."; LOG_DEBUG << "Use custom engine path: " << opts.custom_engine_path; - LOG_DEBUG << "Engine path: " << opts.engine_path; + LOG_DEBUG << "Engine path: " << opts.engine_path.string(); + + SetFileLogger(opts.max_log_lines, opts.log_path.string()); + SetLogLevel(opts.log_level); #if defined(_WIN32) if (!opts.custom_engine_path) { - if (auto cookie = AddDllDirectory(p.c_str()); cookie != 0) { - LOG_INFO << "Added dll directory: " << p.string(); + if (auto cookie = AddDllDirectory(opts.engine_path.c_str()); cookie != 0) { + LOG_INFO << "Added dll directory: " << opts.engine_path.string(); cookies_.push_back(cookie); } else { - LOG_WARN << "Could not add dll directory: " << p.string(); + LOG_WARN << "Could not add dll directory: " << opts.engine_path.string(); } if (auto cuda_cookie = AddDllDirectory(opts.cuda_path.c_str()); cuda_cookie != 0) { - LOG_INFO << "Added cuda dll directory: " << p.string(); + LOG_INFO << "Added cuda dll directory: " << opts.cuda_path.string(); cookies_.push_back(cookie); } else { - LOG_WARN << "Could not add cuda dll directory: " << p.string(); + LOG_WARN << "Could not add cuda dll directory: " + << opts.cuda_path.string(); } } #endif LOG_INFO << "Engine loaded successfully"; } +void LlamaEngine::Unload(EngineUnloadOption opts) { + LOG_INFO << "Unloading engine.."; + LOG_DEBUG << "Unload dll: " << opts.unload_dll; + + if (opts.unload_dll) { +#if defined(_WIN32) + for (const auto& cookie : cookies_) { + if (!RemoveDllDirectory(cookie)) { + LOG_WARN << "Could not remove dll directory"; + } + } +#endif + } + LOG_INFO << "Engine unloaded successfully"; +} + LlamaEngine::LlamaEngine(int log_option) { trantor::Logger::setLogLevel(trantor::Logger::kInfo); if (log_option == kFileLoggerOption) { @@ -321,7 +359,6 @@ LlamaEngine::LlamaEngine(int log_option) { } LlamaEngine::~LlamaEngine() { - LOG_INFO << "Unloading engine.."; for (auto& [_, si] : server_map_) { auto& l = si.ctx; l.ReleaseResources(); @@ -329,14 +366,7 @@ LlamaEngine::~LlamaEngine() { server_map_.clear(); async_file_logger_.reset(); -#if defined(_WIN32) - for (const auto& cookie : cookies_) { - if (!RemoveDllDirectory(cookie)) { - LOG_WARN << "Could not remove dll directory"; - } - } -#endif - LOG_INFO << "Engine unloaded successfully"; + LOG_INFO << "LlamaEngine destructed successfully"; } void LlamaEngine::HandleChatCompletion( diff --git a/src/llama_engine.h b/src/llama_engine.h index 2d489c5..4e7d2f9 100644 --- a/src/llama_engine.h +++ b/src/llama_engine.h @@ -11,14 +11,18 @@ class LlamaEngine : public EngineI { public: - constexpr auto kEngineName = "cortex.llamacpp"; + constexpr static auto kEngineName = "cortex.llamacpp"; LlamaEngine(int log_option = 0); ~LlamaEngine() final; // #### Interface #### + void RegisterLibraryPath(RegisterLibraryOption opts) final; + void Load(EngineLoadOption opts) final; + void Unload(EngineUnloadOption opts) final; + void HandleChatCompletion( std::shared_ptr jsonBody, std::function&& callback) final;