Skip to content

feat: e2e testing for embedding and completion endpoints #44

feat: e2e testing for embedding and completion endpoints

feat: e2e testing for embedding and completion endpoints #44

Workflow file for this run

name: CI
on:
pull_request:
types: [opened, synchronize, reopened]
workflow_dispatch:
env:
LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf
jobs:
ubuntu-amd64-build:
runs-on: ubuntu-18-04-cuda-11-7
timeout-minutes: 40
strategy:
matrix:
include:
- build: "amd64-avx2"
defines: "-DLLAMA_NATIVE=OFF"
- build: "amd64-avx"
defines: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
- build: "amd64-avx512"
defines: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
- build: "amd64-vulkan"
defines: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Prepare Vulkan SDK
if: ${{ matrix.build == 'amd64-vulkan' }}
uses: humbletim/[email protected]
with:
vulkan-query-version: 1.3.275.0
vulkan-components: Vulkan-Headers, Vulkan-Loader
vulkan-use-cache: true
- name: Build library
run: |
./configure.sh
make build CMAKE_EXTRA_FLAGS="${{ matrix.defines }}"
- name: Build server example
run: |
mkdir -p examples/server/build
cd examples/server/build
cmake .. ${{ matrix.defines }}
cmake --build . --config Release
- name: Run e2e testing
shell: bash
if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }}
run: |
mkdir -p examples/server/build/engines/cortex.llamacpp
cd examples/server/build/
cp ../../../build/libengine.so engines/cortex.llamacpp/
chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
ubuntu-amd64-cuda-build:
runs-on: ubuntu-18-04-cuda-${{ matrix.cuda }}
timeout-minutes: 40
strategy:
matrix:
cuda: ["12-0", "11-7"]
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Build library
run: |
./configure.sh
make build CMAKE_EXTRA_FLAGS="-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
- name: Build server example
run: |
mkdir -p examples/server/build
cd examples/server/build
cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON
cmake --build . --config Release
macOS-silicon-build:
runs-on: mac-silicon
timeout-minutes: 40
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Build library
run: |
./configure.sh
make build
- name: Build server example
run: |
mkdir -p examples/server/build
cd examples/server/build
cmake ..
cmake --build . --config Release
- name: Run e2e testing
shell: bash
run: |
mkdir -p examples/server/build/engines/cortex.llamacpp
cd examples/server/build/
cp ../../../build/libengine.dylib engines/cortex.llamacpp/
chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
macOS-amd64-build:
runs-on: macos-13
timeout-minutes: 40
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Build library
id: cmake_build
run: |
./configure.sh
make build
- name: Build server example
run: |
mkdir -p examples/server/build
cd examples/server/build
cmake ..
cmake --build . --config Release
- name: Run e2e testing
shell: bash
run: |
mkdir -p examples/server/build/engines/cortex.llamacpp
cd examples/server/build/
cp ../../../build/libengine.dylib engines/cortex.llamacpp/
chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
windows-amd64-build:
runs-on: windows-latest
timeout-minutes: 40
strategy:
matrix:
include:
- build: "amd64-avx2"
defines: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- build: "amd64-avx"
defines: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- build: "amd64-avx512"
defines: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- build: "amd64-vulkan"
defines: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: install make-gnu
run: |
choco install make -y
- name: Prepare Vulkan SDK
uses: humbletim/[email protected]
if: ${{ matrix.build == 'amd64-vulkan' }}
with:
vulkan-query-version: 1.3.275.0
vulkan-components: Vulkan-Headers, Vulkan-Loader
vulkan-use-cache: true
- name: Build library
shell: cmd
run: |
cmake -S ./third-party -B ./build_deps/third-party
cmake --build ./build_deps/third-party --config Release -j %NUMBER_OF_PROCESSORS%
mkdir -p build
cd build
cmake .. ${{ matrix.defines }}
cmake --build . --config Release
- name: Build server example
shell: cmd
run: |
mkdir .\examples\server\build
cd .\examples\server\build
cmake .. ${{ matrix.defines }}
cmake --build . --config Release
- name: Run e2e testing
shell: cmd
if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }}
run: |
mkdir examples\server\build\Release\engines\cortex.llamacpp
cd examples\server\build\Release
copy ..\..\..\..\build\Release\engine.dll engines\cortex.llamacpp\
..\..\..\..\.github\scripts\e2e-test-server-windows.bat server.exe ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
windows-amd64-cuda-build:
runs-on: windows-cuda-${{ matrix.cuda }}
timeout-minutes: 40
strategy:
matrix:
include:
- cuda: "12-0"
instructions: "-DLLAMA_NATIVE=OFF"
flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- cuda: "12-0"
instructions: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- cuda: "12-0"
instructions: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- cuda: "11-7"
instructions: "-DLLAMA_NATIVE=OFF"
flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- cuda: "11-7"
instructions: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- cuda: "11-7"
instructions: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: Build library
shell: cmd
run: |
cmake -S ./third-party -B ./build_deps/third-party
cmake --build ./build_deps/third-party --config Release -j %NUMBER_OF_PROCESSORS%
mkdir -p build
cd build
cmake .. ${{ matrix.instructions }} ${{ matrix.flags }}
cmake --build . --config Release
- name: Build server example
shell: cmd
run: |
mkdir .\examples\server\build
cd .\examples\server\build
cmake .. ${{ matrix.instructions }} ${{ matrix.flags }}
cmake --build . --config Release