[WIP] Add PR-based public beta system for GPU instances #3032

Workflow file for this run

.github/workflows/test-and-build.yml at 383c805

	name: Test And Build

	on:
	pull_request:
	types: [opened, synchronize, reopened, ready_for_review]
	branches:
	- main
	push:
	branches:
	- main
	schedule:
	# Run nightly at 2:00 AM UTC
	- cron: "0 2 * * *"
	workflow_dispatch: # Allow manual triggering

	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	jobs:
	# use changes as filter
	changes:
	uses: ./.github/workflows/ci-changes.yml

	test-and-build:
	needs: changes
	if: >-
	${{ !github.event.pull_request.draft
	&& (github.event_name == 'schedule'
	\|\| needs.changes.outputs.core == 'true'
	\|\| needs.changes.outputs.helm == 'true'
	\|\| needs.changes.outputs.e2e == 'true'
	\|\| needs.changes.outputs.docker == 'true'
	\|\| needs.changes.outputs.make == 'true'
	\|\| needs.changes.outputs.ci == 'true') }}
	runs-on: ubuntu-latest
	steps:
	- name: Check out the repo
	uses: actions/checkout@v4

	- name: Set up Rust
	uses: dtolnay/rust-toolchain@stable
	with:
	toolchain: 1.90

	- name: Set up Go
	uses: actions/setup-go@v5
	with:
	go-version: "1.24"

	- name: Install system dependencies
	run: \|
	sudo apt-get update
	sudo apt-get install -y \
	make \
	build-essential \
	pkg-config

	- name: Set up golangci-lint
	uses: golangci/golangci-lint-action@v7
	with:
	version: v2.5.0
	install-mode: binary
	args: --help

	- name: Cache Rust dependencies
	uses: actions/cache@v4
	with:
	path: \|
	~/.cargo/bin/
	~/.cargo/registry/index/
	~/.cargo/registry/cache/
	~/.cargo/git/db/
	candle-binding/target/
	key: ${{ runner.os }}-cargo-${{ hashFiles('/Cargo.lock', '/Cargo.toml') }}
	restore-keys: \|
	${{ runner.os }}-cargo-

	- name: Cache Go dependencies
	uses: actions/cache@v4
	with:
	path: \|
	~/go/pkg/mod
	key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
	restore-keys: \|
	${{ runner.os }}-go-

	- name: Setup model storage on /mnt
	run: \|
	# Use /mnt for model storage (has ~75GB vs ~14GB on root)
	# This helps prevent "no space left on device" errors
	echo "Disk space before setup:"
	df -h / && df -h /mnt

	# Create /mnt/models directory if it doesn't exist
	sudo mkdir -p /mnt/models
	sudo chown -R $USER:$USER /mnt/models

	# If models directory already exists in workspace, move it to /mnt
	if [ -d "models" ] && [ ! -L "models" ]; then
	echo "Moving existing models directory to /mnt/models..."
	# Move contents if /mnt/models is not empty, otherwise just move the directory
	if [ "$(ls -A /mnt/models 2>/dev/null)" ]; then
	echo "Warning: /mnt/models already has content, merging..."
	sudo cp -r models/* /mnt/models/ \|\| true
	rm -rf models
	else
	sudo mv models /mnt/models
	fi
	fi

	# Create symlink from models/ to /mnt/models/ so existing code continues to work
	if [ ! -e "models" ]; then
	ln -s /mnt/models models
	echo "Created symlink: models -> /mnt/models"
	elif [ -L "models" ]; then
	echo "Symlink already exists: models -> $(readlink models)"
	else
	echo "Warning: models exists but is not a symlink"
	fi

	echo "Disk space after setup:"
	df -h / && df -h /mnt
	echo "Models directory setup complete. Models will be stored in /mnt/models"

	- name: Cache Models
	uses: actions/cache@v4
	with:
	path: \|
	models/
	key: ${{ runner.os }}-models-v2-${{ hashFiles('tools/make/models.mk') }}
	restore-keys: \|
	${{ runner.os }}-models-v2-
	continue-on-error: true # Don't fail the job if caching fails

	- name: Check go mod tidy
	run: make check-go-mod-tidy

	- name: Build Rust library (CPU-only, no CUDA)
	run: make rust-ci

	- name: Install HuggingFace CLI
	run: \|
	pip install -U "huggingface_hub[cli]" hf_transfer


	- name: Start Milvus service
	run: \|
	echo "Starting Milvus vector database..."
	docker run -d \
	--name milvus-semantic-cache \
	--security-opt seccomp:unconfined \
	-e ETCD_USE_EMBED=true \
	-e ETCD_DATA_DIR=/var/lib/milvus/etcd \
	-e ETCD_CONFIG_PATH=/milvus/configs/advanced/etcd.yaml \
	-e COMMON_STORAGETYPE=local \
	-e CLUSTER_ENABLED=false \
	-p 19530:19530 \
	-p 9091:9091 \
	milvusdb/milvus:v2.3.3 \
	milvus run standalone

	echo "Waiting for Milvus to be ready..."
	sleep 20

	# Verify Milvus is responsive
	timeout 30 bash -c 'until docker logs milvus-semantic-cache 2>&1 \| grep -q "Proxy successfully started"; do sleep 2; done' \|\| true

	echo "Milvus is ready at localhost:19530"
	docker ps --filter "name=milvus-semantic-cache"

	- name: Start Redis service
	run: \|
	echo "Starting Redis Stack..."
	make start-redis

	- name: Run semantic router tests
	run: make test
	env:
	CI: true
	CI_MINIMAL_MODELS: ${{ github.event_name == 'pull_request' }}
	CGO_ENABLED: 1
	LD_LIBRARY_PATH: ${{ github.workspace }}/candle-binding/target/release
	MILVUS_URI: localhost:19530
	SKIP_MILVUS_TESTS: false
	SKIP_REDIS_TESTS: false
	# HF_TOKEN is required for downloading gated models (e.g., embeddinggemma-300m)
	# For PRs from forks, this will be empty and gated models will gracefully skip
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	HUGGINGFACE_HUB_TOKEN: ${{ secrets.HF_TOKEN }}

	- name: Clean Redis service
	if: always()
	run: \|
	echo "Stopping Redis container and cleaning data..."
	make clean-redis

	- name: Stop Milvus service
	if: always()
	run: \|
	echo "Stopping Milvus container..."
	docker stop milvus-semantic-cache \|\| true
	docker rm milvus-semantic-cache \|\| true
	echo "Milvus container cleaned up"

	- name: Upload test artifacts on failure
	if: failure()
	uses: actions/upload-artifact@v4
	with:
	name: test-logs
	path: \|
	*/.log
	*/test-output.
	retention-days: 7

	- name: Notify on failure
	if: failure()
	run: \|
	echo "::error::Test and build failed. Check the workflow run for details."

	# Trigger Docker publishing on successful nightly runs
	publish-docker:
	needs: test-and-build
	if: github.repository == 'vllm-project/semantic-router' && success() && github.event_name == 'schedule'
	uses: ./.github/workflows/docker-publish.yml
	with:
	tag_suffix: nightly-$(date +'%Y%m%d')
	is_nightly: true
	secrets: inherit

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[WIP] Add PR-based public beta system for GPU instances #3032

Workflow file

[WIP] Add PR-based public beta system for GPU instances #3032

Uh oh!

Workflow file for this run