diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2aba83a..573e5d2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -21,8 +21,6 @@ on: "!.gitignore", "!README.md", ] - pull_request: - types: [opened, synchronize, reopened] workflow_dispatch: env: diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/quality-gate.yml new file mode 100644 index 0000000..c7cc239 --- /dev/null +++ b/.github/workflows/quality-gate.yml @@ -0,0 +1,160 @@ +name: CI Quality Gate + +on: + pull_request: + types: [opened, synchronize, reopened] + workflow_dispatch: + +env: + LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf + EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf + +jobs: + build-and-test: + runs-on: ${{ matrix.runs-on }} + timeout-minutes: 40 + strategy: + matrix: + include: + - os: "linux" + name: "amd64-avx2" + runs-on: "ubuntu-18-04-cuda-11-7" + cmake-flags: "-DLLAMA_NATIVE=OFF" + run-e2e: true + vulkan: false + - os: "linux" + name: "amd64-avx" + runs-on: "ubuntu-18-04-cuda-11-7" + cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF" + run-e2e: false + vulkan: false + - os: "linux" + name: "amd64-avx512" + runs-on: "ubuntu-18-04-cuda-11-7" + cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF" + run-e2e: false + vulkan: false + - os: "linux" + name: "amd64-vulkan" + runs-on: "ubuntu-18-04-cuda-11-7" + cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF" + run-e2e: false + vulkan: true + - os: "linux" + name: "amd64-cuda-11-7" + runs-on: "ubuntu-18-04-cuda-11-7" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON" + run-e2e: false + vulkan: false + - os: "linux" + name: "amd64-cuda-12-0" + runs-on: "ubuntu-18-04-cuda-12-0" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON" + run-e2e: false + vulkan: false + - os: "mac" + name: "amd64" + runs-on: "macos-13" + cmake-flags: "" + run-e2e: true + vulkan: false + - os: "mac" + name: "arm64" + runs-on: "mac-silicon" + cmake-flags: "" + run-e2e: true + vulkan: false + - os: "windows" + name: "amd64-avx2" + runs-on: "windows-latest" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: true + vulkan: false + - os: "windows" + name: "amd64-avx" + runs-on: "windows-latest" + cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: true + vulkan: false + - os: "windows" + name: "amd64-avx512" + runs-on: "windows-latest" + cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: true + vulkan: false + - os: "windows" + name: "amd64-vulkan" + runs-on: "windows-latest" + cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: true + vulkan: true + - os: "windows" + name: "amd64-avx2-cuda-12-0" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx-cuda-12-0" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx512-cuda-12-0" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx2-cuda-11-7" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx-cuda-11-7" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx512-cuda-11-7" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Prepare Vulkan SDK + if: ${{ matrix.vulkan }} + uses: humbletim/setup-vulkan-sdk@v1.2.0 + with: + vulkan-query-version: 1.3.275.0 + vulkan-components: Vulkan-Headers, Vulkan-Loader + vulkan-use-cache: true + + - name: Build + run: | + make build-example-server CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" + + - name: Package + run: | + make package + + - name: Run e2e testing + if: ${{ matrix.run-e2e }} + run: | + make run-e2e-test LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }} + + - name: Upload Artifact + uses: actions/upload-artifact@v2 + with: + name: cortex.llamacpp-${{ matrix.os }}-${{ matrix.name }} + path: ./cortex.llamacpp diff --git a/Makefile b/Makefile index 5ec8616..b3df640 100644 --- a/Makefile +++ b/Makefile @@ -1,42 +1,89 @@ # Makefile for Cortex llamacpp engine - Build, Lint, Test, and Clean CMAKE_EXTRA_FLAGS ?= "" +RUN_TESTS ?= false +LLM_MODEL_URL ?= "https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf" +EMBEDDING_MODEL_URL ?= "https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf" # Default target, does nothing all: @echo "Specify a target to run" # Build the Cortex engine -build: +build-lib: ifeq ($(OS),Windows_NT) - mkdir -p build - cd build; \ + @cmake -S ./third-party -B ./build_deps/third-party; + @cmake --build ./build_deps/third-party --config Release -j4; + @mkdir -p build; + @cd build; \ cmake .. $(CMAKE_EXTRA_FLAGS); \ cmake --build . --config Release; else ifeq ($(shell uname -s),Linux) - mkdir build && cd build; \ + @cmake -S ./third-party -B ./build_deps/third-party; + @make -C ./build_deps/third-party -j4; + @rm -rf ./build_deps/third-party; + @mkdir build && cd build; \ cmake .. $(CMAKE_EXTRA_FLAGS); \ - make -j$(nproc); + make -j4; else - mkdir build && cd build; \ + @cmake -S ./third-party -B ./build_deps/third-party + @make -C ./build_deps/third-party -j4 + @rm -rf ./build_deps/third-party + @mkdir build && cd build; \ cmake .. $(CMAKE_EXTRA_FLAGS); \ - make -j$(sysctl -n hw.ncpu); + make -j4; endif -code-sign: build +build-example-server: build-lib ifeq ($(OS),Windows_NT) - @echo "Hello Windows"; + @mkdir .\examples\server\build; \ + cd .\examples\server\build; \ + cmake .. $(CMAKE_EXTRA_FLAGS); \ + cmake --build . --config Release; else ifeq ($(shell uname -s),Linux) - @echo "Hello Linux"; + @mkdir -p examples/server/build && cd examples/server/build; \ + cmake .. $(CMAKE_EXTRA_FLAGS); \ + cmake --build . --config Release; else - @echo "Hello MacOS"; + @mkdir -p examples/server/build && cd examples/server/build; \ + cmake ..; \ + cmake --build . --config Release; endif -package: build +package: +ifeq ($(OS),Windows_NT) + @@mkdir -p cortex.llamacpp; \ + cp build\Release\engine.dll cortex.llamacpp\; \ + 7z a -ttar temp.tar cortex.llamacpp\*; \ + 7z a -tgzip cortex.llamacpp.tar.gz temp.tar +else ifeq ($(shell uname -s),Linux) + @mkdir -p cortex.llamacpp; \ + cp build/libengine.so cortex.llamacpp/; \ + tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp; +else + @mkdir -p cortex.llamacpp; \ + cp build/libengine.dylib cortex.llamacpp/; \ + tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp; +endif + +run-e2e-test: +ifeq ($(RUN_TESTS),false) + @echo "Skipping tests" + @exit 0 +endif ifeq ($(OS),Windows_NT) - @echo "Hello Windows"; + @mkdir examples\server\build\Release\engines\cortex.llamacpp; \ + cd examples\server\build\Release; \ + cp ..\..\..\..\build\Release\engine.dll engines\cortex.llamacpp; \ + ..\..\..\..\.github\scripts\e2e-test-server-windows.bat server.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); else ifeq ($(shell uname -s),Linux) - @echo "Hello Linux"; + @mkdir -p examples/server/build/engines/cortex.llamacpp; \ + cd examples/server/build/; \ + cp ../../../build/libengine.so engines/cortex.llamacpp/; \ + chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); else - @echo "Hello MacOS"; + @mkdir -p examples/server/build/engines/cortex.llamacpp; \ + cd examples/server/build/; \ + cp ../../../build/libengine.dylib engines/cortex.llamacpp/; \ + chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); endif \ No newline at end of file diff --git a/configure.bat b/configure.bat deleted file mode 100755 index 7f24733..0000000 --- a/configure.bat +++ /dev/null @@ -1,2 +0,0 @@ -cmake -S ./third-party -B ./build_deps/third-party -cmake --build ./build_deps/third-party --config Release -j %NUMBER_OF_PROCESSORS% \ No newline at end of file diff --git a/configure.sh b/configure.sh deleted file mode 100755 index 842bbb2..0000000 --- a/configure.sh +++ /dev/null @@ -1,3 +0,0 @@ -cmake -S ./third-party -B ./build_deps/third-party -make -C ./build_deps/third-party -j 10 -rm -rf ./build_deps/third-party \ No newline at end of file