Update tuning status (#1902) #4729
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: vLLM Benchmark | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} | |
| env: | |
| VLLM_BRANCH: "main" | |
| VLLM_REPOSITORY_URL: "https://github.com/vllm-project/vllm" | |
| BASE_IMAGE: rocm/vllm-dev:nightly@sha256:7ac43a3fcb1a9f46fd7575701a587154ad478cc6cf2c19d170f8378ecdb540d9 | |
| GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/aiter.git' }} | |
| GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }} | |
| jobs: | |
| check-signal: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download and check signal artifact | |
| run: ./.github/scripts/check_signal.sh | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| GITHUB_SHA: ${{ github.sha }} | |
| build_vllm_image: | |
| if: ${{ !github.event.pull_request.head.repo.fork }} | |
| needs: [check-signal] | |
| runs-on: aiter-k8s-build | |
| steps: | |
| - name: Checkout aiter repo | |
| uses: actions/checkout@v4 | |
| - name: Sync submodules | |
| run: | | |
| set -e | |
| git submodule sync | |
| git submodule update --init --recursive --depth 1 --jobs 4 | |
| - name: Docker login | |
| run: docker login -u rocmshared -p ${{ secrets.DOCKER_PASSWORD }} | |
| - name: Download the vLLM base image | |
| run: | | |
| docker pull ${{ env.BASE_IMAGE }} | |
| - name: Generate Dockerfile | |
| run: | | |
| cat <<EOF > Dockerfile.mod | |
| FROM ${{ env.BASE_IMAGE }} | |
| RUN echo "=== Aiter version BEFORE uninstall ===" && pip show aiter || true | |
| RUN pip uninstall -y aiter | |
| RUN pip config set global.default-timeout 60 \ | |
| && pip config set global.retries 10 | |
| RUN pip config set global.index-url https://ausartifactory.amd.com/artifactory/api/pypi/hw-cpe-prod-remote/simple | |
| RUN pip install --upgrade "pybind11>=3.0.1" | |
| RUN pip show pybind11 | |
| RUN git clone ${{ env.GITHUB_REPO_URL}} /aiter && \\ | |
| cd /aiter && \\ | |
| git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ | |
| git submodule sync && git submodule update --init --recursive && \\ | |
| python3 setup.py develop | |
| RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true | |
| ENTRYPOINT [""] | |
| EOF | |
| - name: Show Dockerfile | |
| run: cat Dockerfile.mod | |
| - name: Build Docker image | |
| run: | | |
| IMAGE_TAG=rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} | |
| docker build --network=host --no-cache -t $IMAGE_TAG -f Dockerfile.mod . | |
| - name: Push Docker image | |
| run: | | |
| IMAGE_TAG=rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} | |
| docker push $IMAGE_TAG | |
| - name: Success message | |
| run: | | |
| echo "Successfully prepared image: rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}" | |
| vllm_benchmark: | |
| if: ${{ !github.event.pull_request.head.repo.fork }} | |
| runs-on: aiter-8gpu-runner | |
| needs: build_vllm_image | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| model: | |
| - 'mistralai/Mixtral-8x7B-Instruct-v0.1' | |
| - 'deepseek-ai/DeepSeek-R1' | |
| kv_cache_dtype: | |
| - 'default_kvcache' | |
| - 'fp8_kvcache' | |
| exclude: | |
| - model: 'deepseek-ai/DeepSeek-R1' | |
| kv_cache_dtype: 'fp8_kvcache' | |
| steps: | |
| - name: Docker login | |
| run: docker login -u rocmshared -p ${{ secrets.DOCKER_PASSWORD }} | |
| - name: Download the vLLM image | |
| run: | | |
| docker pull rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} | |
| - name: Run benchmarks | |
| run: | | |
| set -x -o pipefail | |
| echo "Starting benchmark for model: ${{ matrix.model }} with kv_cache_dtype: ${{ matrix.kv_cache_dtype }}" | |
| logFile="result_$(echo '${{ matrix.model }}' | sed 's/\//_/g')_kv_${{ matrix.kv_cache_dtype }}.log" | |
| if [[ "${{ matrix.model }}" == *DeepSeek* ]]; then | |
| extraArgs="--block-size 1" | |
| else | |
| extraArgs="" | |
| fi | |
| if [[ "${{ matrix.kv_cache_dtype }}" == "fp8_kvcache" ]]; then | |
| extraArgs="${extraArgs} --kv-cache-dtype fp8" | |
| fi | |
| docker run --rm --device=/dev/kfd --device=/dev/dri --group-add video \ | |
| --ulimit core=0:0 --ulimit memlock=-1:-1 --ulimit stack=67108864 --cap-add=SYS_PTRACE \ | |
| --network=host --security-opt seccomp=unconfined --shm-size=16G \ | |
| -e HF_TOKEN=${{ secrets.HF_TOKEN_TEST }} -e VLLM_ROCM_USE_AITER=1 \ | |
| rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} python -m vllm.entrypoints.cli.main bench latency \ | |
| --model "${{ matrix.model }}" \ | |
| --batch-size 123 --input-len 456 --output-len 78 \ | |
| --num-iters-warmup 3 --num-iters 10 \ | |
| -tp 8 --load-format dummy ${extraArgs} |& tee ${logFile} | |
| grep "Avg latency:" ${logFile} | awk '{print $3}' | |
| - name: Clean up | |
| if: always() | |
| run: | | |
| docker stop aiter-ci:${{ env.GITHUB_COMMIT_SHA }} || true | |
| docker rm -f aiter-ci:${{ env.GITHUB_COMMIT_SHA }} || true | |
| docker rmi rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} || true | |
| skip-job: | |
| if: ${{ github.event.pull_request.head.repo.fork }} | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Skip reason | |
| run: echo "It's a fork repository, skipping tests." |