diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 2aba83a..573e5d2 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -21,8 +21,6 @@ on:
         "!.gitignore",
         "!README.md",
       ]
-  pull_request:
-    types: [opened, synchronize, reopened]
   workflow_dispatch:
 
 env:
diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/quality-gate.yml
new file mode 100644
index 0000000..c7cc239
--- /dev/null
+++ b/.github/workflows/quality-gate.yml
@@ -0,0 +1,160 @@
+name: CI Quality Gate
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+  workflow_dispatch:
+
+env:
+  LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
+  EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf
+
+jobs:
+  build-and-test:
+    runs-on: ${{ matrix.runs-on }}
+    timeout-minutes: 40
+    strategy:
+      matrix:
+        include:
+          - os: "linux"
+            name: "amd64-avx2"
+            runs-on: "ubuntu-18-04-cuda-11-7"
+            cmake-flags: "-DLLAMA_NATIVE=OFF"
+            run-e2e: true
+            vulkan: false
+          - os: "linux"
+            name: "amd64-avx"
+            runs-on: "ubuntu-18-04-cuda-11-7"
+            cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
+            run-e2e: false
+            vulkan: false
+          - os: "linux"
+            name: "amd64-avx512"
+            runs-on: "ubuntu-18-04-cuda-11-7"
+            cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
+            run-e2e: false
+            vulkan: false
+          - os: "linux"
+            name: "amd64-vulkan"
+            runs-on: "ubuntu-18-04-cuda-11-7"
+            cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
+            run-e2e: false
+            vulkan: true
+          - os: "linux"
+            name: "amd64-cuda-11-7"
+            runs-on: "ubuntu-18-04-cuda-11-7"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
+            run-e2e: false
+            vulkan: false
+          - os: "linux"
+            name: "amd64-cuda-12-0"
+            runs-on: "ubuntu-18-04-cuda-12-0"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
+            run-e2e: false
+            vulkan: false
+          - os: "mac"
+            name: "amd64"
+            runs-on: "macos-13"
+            cmake-flags: ""
+            run-e2e: true
+            vulkan: false
+          - os: "mac"
+            name: "arm64"
+            runs-on: "mac-silicon"
+            cmake-flags: ""
+            run-e2e: true
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx2"
+            runs-on: "windows-latest"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: true
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx"
+            runs-on: "windows-latest"
+            cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: true
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx512"
+            runs-on: "windows-latest"
+            cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: true
+            vulkan: false
+          - os: "windows"
+            name: "amd64-vulkan"
+            runs-on: "windows-latest"
+            cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: true
+            vulkan: true
+          - os: "windows"
+            name: "amd64-avx2-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx512-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx2-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx512-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false   
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: recursive
+
+      - name: Prepare Vulkan SDK
+        if: ${{ matrix.vulkan }}
+        uses: humbletim/setup-vulkan-sdk@v1.2.0
+        with:
+          vulkan-query-version: 1.3.275.0
+          vulkan-components: Vulkan-Headers, Vulkan-Loader
+          vulkan-use-cache: true
+
+      - name: Build
+        run: |
+          make build-example-server CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}"
+
+      - name: Package
+        run: |
+          make package
+
+      - name: Run e2e testing
+        if: ${{ matrix.run-e2e }}
+        run: |
+          make run-e2e-test LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }}
+
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v2
+        with:
+          name: cortex.llamacpp-${{ matrix.os }}-${{ matrix.name }}
+          path: ./cortex.llamacpp
diff --git a/Makefile b/Makefile
index 5ec8616..b3df640 100644
--- a/Makefile
+++ b/Makefile
@@ -1,42 +1,89 @@
 # Makefile for Cortex llamacpp engine - Build, Lint, Test, and Clean
 
 CMAKE_EXTRA_FLAGS ?= ""
+RUN_TESTS ?= false
+LLM_MODEL_URL ?= "https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
+EMBEDDING_MODEL_URL ?= "https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf"
 
 # Default target, does nothing
 all:
 	@echo "Specify a target to run"
 
 # Build the Cortex engine
-build:
+build-lib:
 ifeq ($(OS),Windows_NT)
-	mkdir -p build
-	cd build; \
+	@cmake -S ./third-party -B ./build_deps/third-party;
+	@cmake --build ./build_deps/third-party --config Release -j4;
+	@mkdir -p build;
+	@cd build; \
 	cmake .. $(CMAKE_EXTRA_FLAGS); \
 	cmake --build . --config Release;
 else ifeq ($(shell uname -s),Linux)
-	mkdir build && cd build; \
+	@cmake -S ./third-party -B ./build_deps/third-party;
+	@make -C ./build_deps/third-party -j4;
+	@rm -rf ./build_deps/third-party;
+	@mkdir build && cd build; \
 	cmake .. $(CMAKE_EXTRA_FLAGS); \
-	make -j$(nproc);
+	make -j4;
 else
-	mkdir build && cd build; \
+	@cmake -S ./third-party -B ./build_deps/third-party
+	@make -C ./build_deps/third-party -j4
+	@rm -rf ./build_deps/third-party
+	@mkdir build && cd build; \
 	cmake .. $(CMAKE_EXTRA_FLAGS); \
-	make -j$(sysctl -n hw.ncpu);
+	make -j4;
 endif
 
-code-sign: build
+build-example-server: build-lib
 ifeq ($(OS),Windows_NT)
-	@echo "Hello Windows";
+	@mkdir .\examples\server\build; \
+	cd .\examples\server\build; \
+	cmake .. $(CMAKE_EXTRA_FLAGS); \
+	cmake --build . --config Release;
 else ifeq ($(shell uname -s),Linux)
-	@echo "Hello Linux";
+	@mkdir -p examples/server/build && cd examples/server/build; \
+	cmake .. $(CMAKE_EXTRA_FLAGS); \
+	cmake --build . --config Release;
 else
-	@echo "Hello MacOS";
+	@mkdir -p examples/server/build && cd examples/server/build; \
+	cmake ..; \
+	cmake --build . --config Release;
 endif
 
-package: build
+package:
+ifeq ($(OS),Windows_NT)
+	@@mkdir -p cortex.llamacpp; \
+	cp build\Release\engine.dll cortex.llamacpp\; \
+	7z a -ttar temp.tar cortex.llamacpp\*; \
+	7z a -tgzip cortex.llamacpp.tar.gz temp.tar
+else ifeq ($(shell uname -s),Linux)
+	@mkdir -p cortex.llamacpp; \
+	cp build/libengine.so cortex.llamacpp/; \
+	tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp;
+else
+	@mkdir -p cortex.llamacpp; \
+	cp build/libengine.dylib cortex.llamacpp/; \
+	tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp;
+endif
+
+run-e2e-test:
+ifeq ($(RUN_TESTS),false)
+	@echo "Skipping tests"
+	@exit 0
+endif
 ifeq ($(OS),Windows_NT)
-	@echo "Hello Windows";
+	@mkdir examples\server\build\Release\engines\cortex.llamacpp; \
+	cd examples\server\build\Release; \
+	cp ..\..\..\..\build\Release\engine.dll engines\cortex.llamacpp; \
+	..\..\..\..\.github\scripts\e2e-test-server-windows.bat server.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);
 else ifeq ($(shell uname -s),Linux)
-	@echo "Hello Linux";
+	@mkdir -p examples/server/build/engines/cortex.llamacpp; \
+	cd examples/server/build/; \
+	cp ../../../build/libengine.so engines/cortex.llamacpp/; \
+	chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);
 else
-	@echo "Hello MacOS";
+	@mkdir -p examples/server/build/engines/cortex.llamacpp; \
+	cd examples/server/build/; \
+	cp ../../../build/libengine.dylib engines/cortex.llamacpp/; \
+	chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);
 endif
\ No newline at end of file
diff --git a/configure.bat b/configure.bat
deleted file mode 100755
index 7f24733..0000000
--- a/configure.bat
+++ /dev/null
@@ -1,2 +0,0 @@
-cmake -S ./third-party -B ./build_deps/third-party
-cmake --build ./build_deps/third-party --config Release -j %NUMBER_OF_PROCESSORS%
\ No newline at end of file
diff --git a/configure.sh b/configure.sh
deleted file mode 100755
index 842bbb2..0000000
--- a/configure.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-cmake -S ./third-party -B ./build_deps/third-party
-make -C ./build_deps/third-party -j 10
-rm -rf ./build_deps/third-party
\ No newline at end of file