[WIP] Add PR-based public beta system for GPU instances #3032
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test And Build | |
| on: | |
| pull_request: | |
| types: [opened, synchronize, reopened, ready_for_review] | |
| branches: | |
| - main | |
| push: | |
| branches: | |
| - main | |
| schedule: | |
| # Run nightly at 2:00 AM UTC | |
| - cron: "0 2 * * *" | |
| workflow_dispatch: # Allow manual triggering | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| # use changes as filter | |
| changes: | |
| uses: ./.github/workflows/ci-changes.yml | |
| test-and-build: | |
| needs: changes | |
| if: >- | |
| ${{ !github.event.pull_request.draft | |
| && (github.event_name == 'schedule' | |
| || needs.changes.outputs.core == 'true' | |
| || needs.changes.outputs.helm == 'true' | |
| || needs.changes.outputs.e2e == 'true' | |
| || needs.changes.outputs.docker == 'true' | |
| || needs.changes.outputs.make == 'true' | |
| || needs.changes.outputs.ci == 'true') }} | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check out the repo | |
| uses: actions/checkout@v4 | |
| - name: Set up Rust | |
| uses: dtolnay/rust-toolchain@stable | |
| with: | |
| toolchain: 1.90 | |
| - name: Set up Go | |
| uses: actions/setup-go@v5 | |
| with: | |
| go-version: "1.24" | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y \ | |
| make \ | |
| build-essential \ | |
| pkg-config | |
| - name: Set up golangci-lint | |
| uses: golangci/golangci-lint-action@v7 | |
| with: | |
| version: v2.5.0 | |
| install-mode: binary | |
| args: --help | |
| - name: Cache Rust dependencies | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| ~/.cargo/bin/ | |
| ~/.cargo/registry/index/ | |
| ~/.cargo/registry/cache/ | |
| ~/.cargo/git/db/ | |
| candle-binding/target/ | |
| key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock', '**/Cargo.toml') }} | |
| restore-keys: | | |
| ${{ runner.os }}-cargo- | |
| - name: Cache Go dependencies | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| ~/go/pkg/mod | |
| key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} | |
| restore-keys: | | |
| ${{ runner.os }}-go- | |
| - name: Setup model storage on /mnt | |
| run: | | |
| # Use /mnt for model storage (has ~75GB vs ~14GB on root) | |
| # This helps prevent "no space left on device" errors | |
| echo "Disk space before setup:" | |
| df -h / && df -h /mnt | |
| # Create /mnt/models directory if it doesn't exist | |
| sudo mkdir -p /mnt/models | |
| sudo chown -R $USER:$USER /mnt/models | |
| # If models directory already exists in workspace, move it to /mnt | |
| if [ -d "models" ] && [ ! -L "models" ]; then | |
| echo "Moving existing models directory to /mnt/models..." | |
| # Move contents if /mnt/models is not empty, otherwise just move the directory | |
| if [ "$(ls -A /mnt/models 2>/dev/null)" ]; then | |
| echo "Warning: /mnt/models already has content, merging..." | |
| sudo cp -r models/* /mnt/models/ || true | |
| rm -rf models | |
| else | |
| sudo mv models /mnt/models | |
| fi | |
| fi | |
| # Create symlink from models/ to /mnt/models/ so existing code continues to work | |
| if [ ! -e "models" ]; then | |
| ln -s /mnt/models models | |
| echo "Created symlink: models -> /mnt/models" | |
| elif [ -L "models" ]; then | |
| echo "Symlink already exists: models -> $(readlink models)" | |
| else | |
| echo "Warning: models exists but is not a symlink" | |
| fi | |
| echo "Disk space after setup:" | |
| df -h / && df -h /mnt | |
| echo "Models directory setup complete. Models will be stored in /mnt/models" | |
| - name: Cache Models | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| models/ | |
| key: ${{ runner.os }}-models-v2-${{ hashFiles('tools/make/models.mk') }} | |
| restore-keys: | | |
| ${{ runner.os }}-models-v2- | |
| continue-on-error: true # Don't fail the job if caching fails | |
| - name: Check go mod tidy | |
| run: make check-go-mod-tidy | |
| - name: Build Rust library (CPU-only, no CUDA) | |
| run: make rust-ci | |
| - name: Install HuggingFace CLI | |
| run: | | |
| pip install -U "huggingface_hub[cli]" hf_transfer | |
| - name: Start Milvus service | |
| run: | | |
| echo "Starting Milvus vector database..." | |
| docker run -d \ | |
| --name milvus-semantic-cache \ | |
| --security-opt seccomp:unconfined \ | |
| -e ETCD_USE_EMBED=true \ | |
| -e ETCD_DATA_DIR=/var/lib/milvus/etcd \ | |
| -e ETCD_CONFIG_PATH=/milvus/configs/advanced/etcd.yaml \ | |
| -e COMMON_STORAGETYPE=local \ | |
| -e CLUSTER_ENABLED=false \ | |
| -p 19530:19530 \ | |
| -p 9091:9091 \ | |
| milvusdb/milvus:v2.3.3 \ | |
| milvus run standalone | |
| echo "Waiting for Milvus to be ready..." | |
| sleep 20 | |
| # Verify Milvus is responsive | |
| timeout 30 bash -c 'until docker logs milvus-semantic-cache 2>&1 | grep -q "Proxy successfully started"; do sleep 2; done' || true | |
| echo "Milvus is ready at localhost:19530" | |
| docker ps --filter "name=milvus-semantic-cache" | |
| - name: Start Redis service | |
| run: | | |
| echo "Starting Redis Stack..." | |
| make start-redis | |
| - name: Run semantic router tests | |
| run: make test | |
| env: | |
| CI: true | |
| CI_MINIMAL_MODELS: ${{ github.event_name == 'pull_request' }} | |
| CGO_ENABLED: 1 | |
| LD_LIBRARY_PATH: ${{ github.workspace }}/candle-binding/target/release | |
| MILVUS_URI: localhost:19530 | |
| SKIP_MILVUS_TESTS: false | |
| SKIP_REDIS_TESTS: false | |
| # HF_TOKEN is required for downloading gated models (e.g., embeddinggemma-300m) | |
| # For PRs from forks, this will be empty and gated models will gracefully skip | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| HUGGINGFACE_HUB_TOKEN: ${{ secrets.HF_TOKEN }} | |
| - name: Clean Redis service | |
| if: always() | |
| run: | | |
| echo "Stopping Redis container and cleaning data..." | |
| make clean-redis | |
| - name: Stop Milvus service | |
| if: always() | |
| run: | | |
| echo "Stopping Milvus container..." | |
| docker stop milvus-semantic-cache || true | |
| docker rm milvus-semantic-cache || true | |
| echo "Milvus container cleaned up" | |
| - name: Upload test artifacts on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: test-logs | |
| path: | | |
| **/*.log | |
| **/test-output.* | |
| retention-days: 7 | |
| - name: Notify on failure | |
| if: failure() | |
| run: | | |
| echo "::error::Test and build failed. Check the workflow run for details." | |
| # Trigger Docker publishing on successful nightly runs | |
| publish-docker: | |
| needs: test-and-build | |
| if: github.repository == 'vllm-project/semantic-router' && success() && github.event_name == 'schedule' | |
| uses: ./.github/workflows/docker-publish.yml | |
| with: | |
| tag_suffix: nightly-$(date +'%Y%m%d') | |
| is_nightly: true | |
| secrets: inherit |