Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: update medium job to run as PR check #318

Merged
merged 1 commit into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .github/mergify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,21 @@ pull_request_rules:
- -files~=^\.github/(actions|workflows)/.*\.ya?ml$
- -files~=^\.github/workflows/actionlint\.

# e2e medium workflow
- or:
- and:
- check-success~=e2e-medium-workflow-complete
- or:
- files~=\.py$
- files=pyproject.toml
- files=^requirements.*\.txt$
- files=.github/workflows/e2e-nvidia-a10g-x1.yml
- and:
- -files~=\.py$
- -files=pyproject.toml
- -files~=^requirements.*\.txt$
- -files=.github/workflows/e2e-nvidia-a10g-x1.yml

# e2e small workflow
- or:
- and:
Expand Down
172 changes: 172 additions & 0 deletions .github/workflows/e2e-nvidia-a10g-x1.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
# SPDX-License-Identifier: Apache-2.0

name: E2E (NVIDIA A10G x1)

on:
# run against every merge commit to 'main' and release branches
push:
branches:
- main
- release-*
# only run on PRs that touch certain regex paths
pull_request_target:
branches:
- main
- release-*
paths:
# note this should match the merging criteria in 'mergify.yml'
- '**.py'
- 'pyproject.toml'
- 'requirements**.txt'
- '.github/workflows/e2e-nvidia-a10g-x1.yml' # This workflow

concurrency:
group: ${{ github.workflow }}-${{ github.event.number || github.ref }}
cancel-in-progress: true

env:
LC_ALL: en_US.UTF-8

defaults:
run:
shell: bash

permissions:
contents: read

jobs:
start-medium-ec2-runner:
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}

- name: Start EC2 runner
id: start-ec2-runner
uses: machulav/ec2-github-runner@fcfb31a5760dad1314a64a0e172b78ec6fc8a17e # v2.3.6
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ami-01a89eee1adde309c
ec2-instance-type: g5.4xlarge
subnet-id: subnet-02d230cffd9385bd4
security-group-id: sg-06300447c4a5fbef3
iam-role-name: instructlab-ci-runner
aws-resource-tags: >
[
{"Key": "Name", "Value": "instructlab-ci-github-medium-runner"},
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
{"Key": "GitHubRef", "Value": "${{ github.ref }}"},
{"Key": "GitHubPR", "Value": "${{ github.event.number }}"}
]

e2e-medium-test:
needs:
- start-medium-ec2-runner
runs-on: ${{ needs.start-medium-ec2-runner.outputs.label }}

# It is important that this job has no write permissions and has
# no access to any secrets. This part (e2e) is where we are running
# untrusted code from PRs.
permissions: {}

steps:
- name: Install Packages
run: |
cat /etc/os-release
sudo dnf install -y gcc gcc-c++ make git python3.11 python3.11-devel

- name: Checkout instructlab/instructlab
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
with:
repository: "instructlab/instructlab"
path: "instructlab"
# https://github.com/actions/checkout/issues/249
fetch-depth: 0

- name: Checkout instructlab/sdg
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
with:
repository: "instructlab/sdg"
path: "sdg"
# https://github.com/actions/checkout/issues/249
fetch-depth: 0

- name: Fetch and checkout PR
if: ${{ github.event_name == 'pull_request_target' }}
working-directory: ./sdg
run: |
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr-${{ github.event.pull_request.number }}
git checkout pr-${{ github.event.pull_request.number }}

- name: Install ilab
working-directory: ./instructlab
run: |
export CUDA_HOME="/usr/local/cuda"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
export PATH="$PATH:$CUDA_HOME/bin"
python3.11 -m venv --upgrade-deps venv
. venv/bin/activate
nvidia-smi
python3.11 -m pip cache remove llama_cpp_python

CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install .

# https://github.com/instructlab/instructlab/issues/1821
# install with Torch and build dependencies installed
python3.11 -m pip install packaging wheel setuptools-scm
python3.11 -m pip install .[cuda] -r requirements-vllm-cuda.txt

- name: Update instructlab-sdg library
working-directory: ./sdg
run: |
. ../instructlab/venv/bin/activate
pip install .

- name: Check disk
run: |
df -h

- name: Run e2e test
working-directory: ./instructlab
run: |
. venv/bin/activate
./scripts/e2e-ci.sh -m

stop-medium-ec2-runner:
needs:
- start-medium-ec2-runner
- e2e-medium-test
runs-on: ubuntu-latest
if: ${{ always() }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}

- name: Stop EC2 runner
uses: machulav/ec2-github-runner@fcfb31a5760dad1314a64a0e172b78ec6fc8a17e # v2.3.6
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-medium-ec2-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-medium-ec2-runner.outputs.ec2-instance-id }}

e2e-medium-workflow-complete:
# we don't want to block PRs on failed EC2 cleanup
# so not requiring "stop-runner" as well
needs: ["start-medium-ec2-runner", "e2e-medium-test"]
runs-on: ubuntu-latest
steps:
- name: E2E Workflow Complete
run: echo "E2E Workflow Complete"
181 changes: 0 additions & 181 deletions .github/workflows/e2e-nvidia-a10g-x4.yml

This file was deleted.

1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
![License](https://img.shields.io/github/license/instructlab/sdg)

![`e2e-nvidia-t4-x1.yaml` on `main`](https://github.com/instructlab/sdg/actions/workflows/e2e-nvidia-t4-x1.yml/badge.svg?branch=main)
![`e2e-nvidia-a10g-x1.yaml` on `main`](https://github.com/instructlab/sdg/actions/workflows/e2e-nvidia-a10g-x1.yml/badge.svg?branch=main)

Python library for Synthetic Data Generation