Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: e2e testing for embedding and completion endpoints #8

Merged
merged 4 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 178 additions & 0 deletions .github/scripts/e2e-test-server-linux-and-mac.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
#!/bin/bash

## Example run command
# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/server/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf

# Check for required arguments
if [[ $# -ne 3 ]]; then
echo "Usage: $0 <path_to_binary> <url_to_download_llm> <url_to_download_embedding>"
exit 1
fi

rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/server.log

BINARY_PATH=$1
DOWNLOAD_LLM_URL=$2
DOWNLOAD_EMBEDDING_URL=$3

# Random port to ensure it's not used
min=10000
max=11000
range=$((max - min + 1))
PORT=$((RANDOM % range + min))

# Start the binary file
"$BINARY_PATH" 127.0.0.1 $PORT >/tmp/server.log &

# Get the process id of the binary file
pid=$!

if ! ps -p $pid >/dev/null; then
echo "server failed to start. Logs:"
cat /tmp/server.log
exit 1
fi

# Wait for a few seconds to let the server start
sleep 5

# Check if /tmp/testllm exists, if not, download it
if [[ ! -f "/tmp/testllm" ]]; then
curl --connect-timeout 300 $DOWNLOAD_LLM_URL --output /tmp/testllm
fi

# Check if /tmp/test-embedding exists, if not, download it
if [[ ! -f "/tmp/test-embedding" ]]; then
curl --connect-timeout 300 $DOWNLOAD_EMBEDDING_URL --output /tmp/test-embedding
fi

# Run the curl commands
response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/loadmodel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/testllm",
"ctx_len": 50,
"ngl": 32,
"embedding": false
}')

if ! ps -p $pid >/dev/null; then
echo "server failed to load model. Logs:"
cat /tmp/server.log
exit 1
fi

response2=$(
curl --connect-timeout 60 -o /tmp/completion-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \
--header 'Content-Type: application/json' \
--header 'Accept: text/event-stream' \
--header 'Access-Control-Allow-Origin: *' \
--data '{
"messages": [
{"content": "Hello there", "role": "assistant"},
{"content": "Write a long and sad story for me", "role": "user"}
],
"stream": true,
"model": "gpt-3.5-turbo",
"max_tokens": 50,
"stop": ["hello"],
"frequency_penalty": 0,
"presence_penalty": 0,
"temperature": 0.1
}'
)

# unload model
response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/unloadmodel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/testllm"
}')

# load embedding model
response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/loadmodel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/test-embedding",
"ctx_len": 50,
"ngl": 32,
"embedding": true,
"model_type": "embedding"
}')

# request embedding
response5=$(
curl --connect-timeout 60 -o /tmp/embedding-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/embeddings" \
--header 'Content-Type: application/json' \
--header 'Accept: text/event-stream' \
--header 'Access-Control-Allow-Origin: *' \
--data '{
"input": "Hello",
"model": "test-embedding",
"encoding_format": "float"
}'
)

error_occurred=0
if [[ "$response1" -ne 200 ]]; then
echo "The load llm model curl command failed with status code: $response1"
cat /tmp/load-llm-model-res.log
error_occurred=1
fi

if [[ "$response2" -ne 200 ]]; then
echo "The completion curl command failed with status code: $response2"
cat /tmp/completion-res.log
error_occurred=1
fi

if [[ "$response3" -ne 200 ]]; then
echo "The unload model curl command failed with status code: $response3"
cat /tmp/unload-model-res.log
error_occurred=1
fi

if [[ "$response4" -ne 200 ]]; then
echo "The load embedding model curl command failed with status code: $response4"
cat /tmp/load-embedding-model-res.log
error_occurred=1
fi

if [[ "$response5" -ne 200 ]]; then
echo "The embedding curl command failed with status code: $response5"
cat /tmp/embedding-res.log
error_occurred=1
fi

if [[ "$error_occurred" -eq 1 ]]; then
echo "Server test run failed!!!!!!!!!!!!!!!!!!!!!!"
echo "Server Error Logs:"
cat /tmp/server.log
kill $pid
exit 1
fi

echo "----------------------"
echo "Log load model:"
cat /tmp/load-llm-model-res.log

echo "----------------------"
echo "Log run test:"
cat /tmp/completion-res.log

echo "----------------------"
echo "Log run test:"
cat /tmp/unload-model-res.log

echo "----------------------"
echo "Log run test:"
cat /tmp/load-embedding-model-res.log

echo "----------------------"
echo "Log run test:"
cat /tmp/embedding-res.log

echo "Server test run successfully!"

# Kill the server process
kill $pid
165 changes: 165 additions & 0 deletions .github/scripts/e2e-test-server-windows.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
@echo off

set "TEMP=C:\Users\%UserName%\AppData\Local\Temp"
set "MODEL_LLM_PATH=%TEMP%\testllm"
set "MODEL_EMBEDDING_PATH=%TEMP%\test-embedding"

rem Check for required arguments
if "%~3"=="" (
echo Usage: %~0 ^<path_to_binary^> ^<url_to_download_llm^> ^<url_to_download_embedding^>
exit /b 1
)

set "BINARY_PATH=%~1"
set "DOWNLOAD_LLM_URL=%~2"
set "DOWNLOAD_EMBEDDING_URL=%~3"

for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi"

echo BINARY_NAME=%BINARY_NAME%

del %TEMP%\response1.log 2>nul
del %TEMP%\response2.log 2>nul
del %TEMP%\response3.log 2>nul
del %TEMP%\response4.log 2>nul
del %TEMP%\response5.log 2>nul
del %TEMP%\server.log 2>nul

set /a min=9999
set /a max=11000
set /a range=max-min+1
set /a PORT=%min% + %RANDOM% %% %range%

rem Start the binary file
start /B "" "%BINARY_PATH%" "127.0.0.1" %PORT% > %TEMP%\server.log 2>&1

ping -n 6 127.0.0.1 %PORT% > nul

rem Capture the PID of the started process with "server" in its name
for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do (
set "pid=%%a"
)

echo pid=%pid%

if not defined pid (
echo server failed to start. Logs:
type %TEMP%\server.log
exit /b 1
)

rem Wait for a few seconds to let the server start

rem Check if %TEMP%\testmodel exists, if not, download it
if not exist "%MODEL_LLM_PATH%" (
curl.exe --connect-timeout 300 %DOWNLOAD_LLM_URL% --output "%MODEL_LLM_PATH%"
)

if not exist "%MODEL_EMBEDDING_PATH%" (
curl.exe --connect-timeout 300 %DOWNLOAD_EMBEDDING_URL% --output "%MODEL_EMBEDDING_PATH%"
)

rem Define JSON strings for curl data
call set "MODEL_LLM_PATH_STRING=%%MODEL_LLM_PATH:\=\\%%"
call set "MODEL_EMBEDDING_PATH_STRING=%%MODEL_EMBEDDING_PATH:\=\\%%"
set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":false,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
set "curl_data3={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
set "curl_data4={\"llama_model_path\":\"%MODEL_EMBEDDING_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}"
set "curl_data5={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}"

rem Print the values of curl_data for debugging
echo curl_data1=%curl_data1%
echo curl_data2=%curl_data2%
echo curl_data3=%curl_data3%
echo curl_data4=%curl_data4%
echo curl_data5=%curl_data5%

rem Run the curl commands and capture the status code
curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/loadmodel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/chat/completions" ^
--header "Content-Type: application/json" ^
--data "%curl_data2%" > %TEMP%\response2.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/unloadmodel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/loadmodel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response5.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^
--header "Content-Type: application/json" ^
--data "%curl_data5%" > %TEMP%\response5.log 2>&1

set "error_occurred=0"

rem Read the status codes from the log files
for /f %%a in (%TEMP%\response1.log) do set "response1=%%a"
for /f %%a in (%TEMP%\response2.log) do set "response2=%%a"
for /f %%a in (%TEMP%\response3.log) do set "response3=%%a"
for /f %%a in (%TEMP%\response4.log) do set "response4=%%a"
for /f %%a in (%TEMP%\response5.log) do set "response5=%%a"

if "%response1%" neq "200" (
echo The first curl command failed with status code: %response1%
type %TEMP%\response1.log
set "error_occurred=1"
)

if "%response2%" neq "200" (
echo The second curl command failed with status code: %response2%
type %TEMP%\response2.log
set "error_occurred=1"
)

if "%response3%" neq "200" (
echo The third curl command failed with status code: %response3%
type %TEMP%\response3.log
set "error_occurred=1"
)

if "%response4%" neq "200" (
echo The fourth curl command failed with status code: %response4%
type %TEMP%\response4.log
set "error_occurred=1"
)

if "%response5%" neq "200" (
echo The fifth curl command failed with status code: %response5%
type %TEMP%\response5.log
set "error_occurred=1"
)

if "%error_occurred%"=="1" (
echo Server test run failed!!!!!!!!!!!!!!!!!!!!!!
echo Server Error Logs:
type %TEMP%\server.log
taskkill /f /pid %pid%
exit /b 1
)


echo ----------------------
echo Log load llm model:
type %TEMP%\response1.log

echo ----------------------
echo Log run test:
type %TEMP%\response2.log

echo ----------------------
echo Log unload model:
type %TEMP%\response3.log

echo ----------------------
echo Log load embedding model:
type %TEMP%\response3.log

echo ----------------------
echo Log run embedding test:
type %TEMP%\response5.log

echo Server test run successfully!

rem Kill the server process
@REM taskkill /f /pid %pid%
taskkill /f /im server.exe 2>nul || exit /B 0
Loading
Loading