feat: e2e testing for embedding and completion endpoints #44

Workflow file for this run

	name: CI

	on:
	pull_request:
	types: [opened, synchronize, reopened]
	workflow_dispatch:

	env:
	LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
	EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf

	jobs:
	ubuntu-amd64-build:
	runs-on: ubuntu-18-04-cuda-11-7
	timeout-minutes: 40

	strategy:
	matrix:
	include:
	- build: "amd64-avx2"
	defines: "-DLLAMA_NATIVE=OFF"
	- build: "amd64-avx"
	defines: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
	- build: "amd64-avx512"
	defines: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
	- build: "amd64-vulkan"
	defines: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v3
	with:
	submodules: recursive

	- name: Prepare Vulkan SDK
	if: ${{ matrix.build == 'amd64-vulkan' }}
	uses: humbletim/[email protected]
	with:
	vulkan-query-version: 1.3.275.0
	vulkan-components: Vulkan-Headers, Vulkan-Loader
	vulkan-use-cache: true

	- name: Build library
	run: \|
	./configure.sh
	make build CMAKE_EXTRA_FLAGS="${{ matrix.defines }}"

	- name: Build server example
	run: \|
	mkdir -p examples/server/build
	cd examples/server/build
	cmake .. ${{ matrix.defines }}
	cmake --build . --config Release

	- name: Run e2e testing
	shell: bash
	if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }}
	run: \|
	mkdir -p examples/server/build/engines/cortex.llamacpp
	cd examples/server/build/
	cp ../../../build/libengine.so engines/cortex.llamacpp/
	chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}

	ubuntu-amd64-cuda-build:
	runs-on: ubuntu-18-04-cuda-${{ matrix.cuda }}
	timeout-minutes: 40

	strategy:
	matrix:
	cuda: ["12-0", "11-7"]

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v3
	with:
	submodules: recursive

	- name: Build library
	run: \|
	./configure.sh
	make build CMAKE_EXTRA_FLAGS="-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"

	- name: Build server example
	run: \|
	mkdir -p examples/server/build
	cd examples/server/build
	cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON
	cmake --build . --config Release


	macOS-silicon-build:
	runs-on: mac-silicon
	timeout-minutes: 40
	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v3
	with:
	submodules: recursive

	- name: Build library
	run: \|
	./configure.sh
	make build

	- name: Build server example
	run: \|
	mkdir -p examples/server/build
	cd examples/server/build
	cmake ..
	cmake --build . --config Release

	- name: Run e2e testing
	shell: bash
	run: \|
	mkdir -p examples/server/build/engines/cortex.llamacpp
	cd examples/server/build/
	cp ../../../build/libengine.dylib engines/cortex.llamacpp/
	chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}


	macOS-amd64-build:
	runs-on: macos-13
	timeout-minutes: 40
	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v3
	with:
	submodules: recursive

	- name: Build library
	id: cmake_build
	run: \|
	./configure.sh
	make build

	- name: Build server example
	run: \|
	mkdir -p examples/server/build
	cd examples/server/build
	cmake ..
	cmake --build . --config Release

	- name: Run e2e testing
	shell: bash
	run: \|
	mkdir -p examples/server/build/engines/cortex.llamacpp
	cd examples/server/build/
	cp ../../../build/libengine.dylib engines/cortex.llamacpp/
	chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}


	windows-amd64-build:
	runs-on: windows-latest
	timeout-minutes: 40

	strategy:
	matrix:
	include:
	- build: "amd64-avx2"
	defines: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
	- build: "amd64-avx"
	defines: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
	- build: "amd64-avx512"
	defines: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
	- build: "amd64-vulkan"
	defines: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v3
	with:
	submodules: recursive

	- name: install make-gnu
	run: \|
	choco install make -y

	- name: Prepare Vulkan SDK
	uses: humbletim/[email protected]
	if: ${{ matrix.build == 'amd64-vulkan' }}
	with:
	vulkan-query-version: 1.3.275.0
	vulkan-components: Vulkan-Headers, Vulkan-Loader
	vulkan-use-cache: true

	- name: Build library
	shell: cmd
	run: \|
	cmake -S ./third-party -B ./build_deps/third-party
	cmake --build ./build_deps/third-party --config Release -j %NUMBER_OF_PROCESSORS%
	mkdir -p build
	cd build
	cmake .. ${{ matrix.defines }}
	cmake --build . --config Release

	- name: Build server example
	shell: cmd
	run: \|
	mkdir .\examples\server\build
	cd .\examples\server\build
	cmake .. ${{ matrix.defines }}
	cmake --build . --config Release

	- name: Run e2e testing
	shell: cmd
	if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }}
	run: \|
	mkdir examples\server\build\Release\engines\cortex.llamacpp
	cd examples\server\build\Release
	copy ..\..\..\..\build\Release\engine.dll engines\cortex.llamacpp\
	..\..\..\..\.github\scripts\e2e-test-server-windows.bat server.exe ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}

	windows-amd64-cuda-build:
	runs-on: windows-cuda-${{ matrix.cuda }}
	timeout-minutes: 40

	strategy:
	matrix:
	include:
	- cuda: "12-0"
	instructions: "-DLLAMA_NATIVE=OFF"
	flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
	- cuda: "12-0"
	instructions: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
	flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
	- cuda: "12-0"
	instructions: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
	flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
	- cuda: "11-7"
	instructions: "-DLLAMA_NATIVE=OFF"
	flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
	- cuda: "11-7"
	instructions: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
	flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
	- cuda: "11-7"
	instructions: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
	flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v3
	with:
	submodules: recursive

	- name: Build library
	shell: cmd
	run: \|
	cmake -S ./third-party -B ./build_deps/third-party
	cmake --build ./build_deps/third-party --config Release -j %NUMBER_OF_PROCESSORS%
	mkdir -p build
	cd build
	cmake .. ${{ matrix.instructions }} ${{ matrix.flags }}
	cmake --build . --config Release

	- name: Build server example
	shell: cmd
	run: \|
	mkdir .\examples\server\build
	cd .\examples\server\build
	cmake .. ${{ matrix.instructions }} ${{ matrix.flags }}
	cmake --build . --config Release

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: e2e testing for embedding and completion endpoints #44

Workflow file

feat: e2e testing for embedding and completion endpoints #44

Jobs

Run details

Workflow file for this run