Skip to content

Fix inference regression, private model loading, and catch the timeout error when waiting for Vespa converge #2581

Fix inference regression, private model loading, and catch the timeout error when waiting for Vespa converge

Fix inference regression, private model loading, and catch the timeout error when waiting for Vespa converge #2581

name: unit_test_200gb_CI
# runs unit tests on AMD64 machine
on:
workflow_call:
workflow_dispatch:
push:
branches:
- mainline
- releases/*
pull_request:
branches:
- mainline
- releases/*
concurrency:
group: unit-tests-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
Start-Runner:
name: Start self-hosted EC2 runner
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.MARQO_WORKFLOW_TESTS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.MARQO_WORKFLOW_TESTS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Start EC2 runner
id: start-ec2-runner
uses: machulav/ec2-github-runner@v2
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ${{ vars.MARQO_CPU_AMD64_TESTS_INSTANCE_AMI }}
ec2-instance-type: m6i.xlarge
subnet-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SUBNET_ID }}
security-group-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SECURITY_GROUP_ID }}
aws-resource-tags: > # optional, requires additional permissions
[
{"Key": "Name", "Value": "marqo-github-runner-${{ github.run_id }}"},
{"Key": "GitHubRepo", "Value": "${{ github.repository }}"},
{"Key": "WorkflowName", "Value": "${{ github.workflow }}"},
{"Key": "WorkflowRunId", "Value": "${{ github.run_id }}"},
{"Key": "WorlflowURL", "Value": "${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}"},
{"Key": "PoloRole", "Value": "testing"}
]
Test-Marqo:
name: Run Unit Tests
needs: Start-Runner # required to start the main job when the runner is ready
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
environment: marqo-test-suite
steps:
- name: Checkout marqo repo
uses: actions/checkout@v3
with:
fetch-depth: 0
path: marqo
- name: Set up Python 3.8
uses: actions/setup-python@v3
with:
python-version: "3.8"
cache: "pip"
- name: Checkout marqo-base for requirements
uses: actions/checkout@v3
with:
repository: marqo-ai/marqo-base
path: marqo-base
- name: Install dependencies
run: |
pip install -r marqo-base/requirements.txt
# override base requirements with marqo requirements, if needed:
pip install -r marqo/requirements.dev.txt
- name: Build Vespa
run: |
systemctl stop unattended-upgrades
apt-get remove -y unattended-upgrades
# Function to wait for the dpkg lock to be released
function wait_for_dpkg_lock() {
while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; do
echo "Waiting for the dpkg lock to be released..."
sleep 5
done
}
# Wait for the dpkg lock before updating and installing
wait_for_dpkg_lock
echo "Updating package list"
apt-get update -y
# Build Vespa components
echo "Installing jdk 17"
sudo apt-get install openjdk-17-jdk -y
echo "Installing maven"
sudo apt-get install maven -y
echo "Building Vespa components"
cd marqo/vespa
mvn clean package
- name: Start Vespa
run: |
# Define these for checking if Vespa is ready
export VESPA_CONFIG_URL=http://localhost:19071
export VESPA_DOCUMENT_URL=http://localhost:8080
export VESPA_QUERY_URL=http://localhost:8080
cd marqo/scripts/vespa_local
set -x
python vespa_local.py start
set +x
echo "Waiting for Vespa to start"
for i in {1..20}; do
echo -ne "Waiting... $i seconds\r"
sleep 1
done
echo -e "\nDone waiting."
# Zip up schemas and services
sudo apt-get install zip -y
zip -r vespa_tester_app.zip services.xml schemas
# Deploy application with test schema
curl --header "Content-Type:application/zip" --data-binary @vespa_tester_app.zip http://localhost:19071/application/v2/tenant/default/prepareandactivate
# wait for vespa to start (document url):
timeout 10m bash -c 'until curl -f -X GET $VESPA_DOCUMENT_URL >/dev/null 2>&1; do echo " Waiting for Vespa document API to be available..."; sleep 10; done;' || \
(echo "Vespa (Document URL) did not start in time" && exit 1)
echo "Vespa document API is available. Local Vespa setup complete."
# Delete the zip file
rm vespa_tester_app.zip
echo "Deleted vespa_tester_app.zip"
- name: Run Unit Tests
run: |
# Define these for use by marqo
export VESPA_CONFIG_URL=http://localhost:19071
export VESPA_DOCUMENT_URL=http://localhost:8080
export VESPA_QUERY_URL=http://localhost:8080
cd marqo
export PYTHONPATH="./tests:./src:."
pytest --ignore=tests/test_documentation.py --durations=100 --cov=src --cov-branch --cov-context=test --cov-report=html:cov_html --cov-report=lcov:lcov.info tests
- name: Upload Test Report
uses: actions/upload-artifact@v4
with:
name: marqo-test-report
path: marqo/cov_html/
Stop-Runner:
name: Stop self-hosted EC2 runner
needs:
- Start-Runner # required to get output from the start-runner job
- Test-Marqo # required to wait when the main job is done
runs-on: ubuntu-latest
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.MARQO_WORKFLOW_TESTS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.MARQO_WORKFLOW_TESTS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Stop EC2 runner
uses: machulav/ec2-github-runner@v2
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}