Skip to content

Commit

Permalink
ci: update small E2E job to align with CLI and Training
Browse files Browse the repository at this point in the history
updates include the new AMI and new runner script

Signed-off-by: Nathan Weinberg <[email protected]>
  • Loading branch information
nathan-weinberg committed Oct 21, 2024
1 parent 067e4c1 commit c6350b9
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 49 deletions.
4 changes: 2 additions & 2 deletions .github/mergify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ pull_request_rules:
- -files~=^\.github/(actions|workflows)/.*\.ya?ml$
- -files~=^\.github/workflows/actionlint\.

# e2e workflow
# e2e small workflow
- or:
- and:
- check-success=e2e-workflow-complete
- check-success=e2e-small-workflow-complete
- or:
- files~=\.py$
- files=pyproject.toml
Expand Down
85 changes: 38 additions & 47 deletions .github/workflows/e2e-nvidia-t4-x1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,18 @@
name: E2E (NVIDIA Tesla T4 x1)

on:
# run against every merge commit to 'main' and release branches
push:
branches:
- main
- release-*
paths:
- "**.py"
- "pyproject.toml"
- "requirements**.txt"
- ".github/workflows/e2e-nvidia-t4-x1.yml" # This workflow
# only run on PRs that touch certain regex paths
pull_request_target:
types:
- opened
- synchronize
- reopened
branches:
- main
- release-*
paths:
# note this should match the merging criteria in 'mergify.yml'
- "**.py"
- "pyproject.toml"
- "requirements**.txt"
Expand All @@ -30,9 +24,18 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

env:
LC_ALL: en_US.UTF-8

defaults:
run:
shell: bash

permissions:
contents: read

jobs:
start-runner:
name: Start external EC2 runner
start-small-ec2-runner:
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
Expand All @@ -44,13 +47,14 @@ jobs:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}

- name: Start EC2 runner
id: start-ec2-runner
uses: machulav/ec2-github-runner@fcfb31a5760dad1314a64a0e172b78ec6fc8a17e # v2.3.6
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ami-00c51d9c1374eda97
ec2-image-id: ami-01a89eee1adde309c
ec2-instance-type: g4dn.2xlarge
subnet-id: subnet-02d230cffd9385bd4
security-group-id: sg-06300447c4a5fbef3
Expand All @@ -63,30 +67,28 @@ jobs:
{"Key": "GitHubPR", "Value": "${{ github.event.number }}"}
]
e2e:
name: E2E Test
needs: start-runner
runs-on: ${{ needs.start-runner.outputs.label }}
e2e-small-test:
needs:
- start-small-ec2-runner
runs-on: ${{ needs.start-small-ec2-runner.outputs.label }}

# It is important that this job has no write permissions and has
# no access to any secrets. This part (e2e) is where we are running
# untrusted code from PRs.
permissions: {}

# No step-security/harden-runner since this is a self-hosted runner
steps:
# for debugging
- name: Print environment state
- name: Install Packages
run: |
echo "Current Working Directory: $PWD"
echo "Files in Local Directory:"
ls -l
cat /etc/os-release
sudo dnf install -y gcc gcc-c++ make git python3.11 python3.11-devel
- name: Checkout instructlab/instructlab
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
with:
repository: "instructlab/instructlab"
path: "instructlab"
# https://github.com/actions/checkout/issues/249
fetch-depth: 0

- name: Checkout instructlab/sdg
Expand All @@ -97,27 +99,14 @@ jobs:
# https://github.com/actions/checkout/issues/249
fetch-depth: 0

# for debugging
- name: Print environment state
run: |
echo "Current Working Directory: $PWD"
echo "Files in Local Directory:"
ls -l
- name: Fetch and checkout PR
id: fetch_pr
if: github.event_name == 'pull_request_target'
working-directory: ./sdg
run: |
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr-${{ github.event.pull_request.number }}
git checkout pr-${{ github.event.pull_request.number }}
- name: Install system packages
run: |
cat /etc/os-release
sudo dnf install -y gcc gcc-c++ make git python3.11 python3.11-devel
- name: Install instructlab
- name: Install ilab
working-directory: ./instructlab
run: |
export PATH="/home/ec2-user/.local/bin:/usr/local/cuda/bin:$PATH"
Expand All @@ -138,19 +127,21 @@ jobs:
run: |
. ../instructlab/venv/bin/activate
pip install .
pip install .[cuda]
- name: Check disk
run: |
df -h
- name: Run e2e test
working-directory: ./instructlab
run: |
. venv/bin/activate
./scripts/e2e-custom.sh -msq
./scripts/e2e-ci.sh -s
stop-runner:
name: Stop external EC2 runner
stop-small-ec2-runner:
needs:
- start-runner
- e2e
- start-small-ec2-runner
- e2e-small-test
runs-on: ubuntu-latest
if: ${{ always() }}
steps:
Expand All @@ -165,13 +156,13 @@ jobs:
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}
label: ${{ needs.start-small-ec2-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-small-ec2-runner.outputs.ec2-instance-id }}

e2e-workflow-complete:
e2e-small-workflow-complete:
# we don't want to block PRs on failed EC2 cleanup
# so not requiring "stop-runner" as well
needs: ["start-runner", "e2e"]
# so not requiring "stop-small-ec2-runner" as well
needs: ["start-small-ec2-runner", "e2e-small-test"]
runs-on: ubuntu-latest
steps:
- name: E2E Workflow Complete
Expand Down

0 comments on commit c6350b9

Please sign in to comment.