Merge pull request #697 from instructlab/dependabot/github_actions/py… #375

Workflow file for this run

.github/workflows/functional-gpu-nvidia-t4-x1.yml at e10cad8

	# SPDX-License-Identifier: Apache-2.0

	name: Functional GPU (NVIDIA Tesla T4 x1)

	on:
	workflow_dispatch: {}
	# run against every merge commit to 'main' and release branches
	push:
	branches:
	- main
	- release-*
	# only run on PRs that touch certain regex paths
	pull_request_target:
	branches:
	- main
	- release-*
	paths:
	# note this should match the merging criteria in 'mergify.yml'
	- "**.py"
	- "pyproject.toml"
	- "requirements**.txt"
	- "constraints-dev.txt"
	- "tox.ini"
	- ".github/workflows/functional-gpu-nvidia-t4-x1.yml" # This workflow

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.ref }}
	cancel-in-progress: true

	env:
	LC_ALL: en_US.UTF-8

	defaults:
	run:
	shell: bash

	permissions:
	contents: read

	jobs:
	start-small-ec2-runner:
	runs-on: ubuntu-latest
	outputs:
	label: ${{ steps.launch-ec2-instance-with-fallback.outputs.label }}
	ec2-instance-id: ${{ steps.launch-ec2-instance-with-fallback.outputs.ec2-instance-id }}
	ec2-instance-region: ${{ steps.launch-ec2-instance-with-fallback.outputs.ec2-instance-region }}
	steps:
	- name: Checkout "launch-ec2-runner-with-fallback" in-house CI action
	uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
	with:
	repository: instructlab/ci-actions
	# clone the "ci-actions" repo to a local directory called "ci-actions", instead of
	# overwriting the current WORKDIR contents
	path: ci-actions
	ref: release-v0.1
	sparse-checkout: \|
	actions/launch-ec2-runner-with-fallback

	- name: Launch EC2 Runner with Fallback
	id: launch-ec2-instance-with-fallback
	uses: ./ci-actions/actions/launch-ec2-runner-with-fallback
	env:
	TMPDIR: "/tmp"
	with:
	aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
	aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	github_token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
	regions_config: >
	[
	{
	"region": "us-east-2",
	"subnets": {
	"us-east-2a": "${{ vars.SUBNET_US_EAST_2A }}",
	"us-east-2b": "${{ vars.SUBNET_US_EAST_2B }}",
	"us-east-2c": "${{ vars.SUBNET_US_EAST_2C }}"
	},
	"ec2-ami": "${{ vars.AWS_EC2_AMI_US_EAST_2 }}",
	"security-group-id": "${{ vars.SECURITY_GROUP_ID_US_EAST_2 }}"
	},
	{
	"region": "us-east-1",
	"subnets": {
	"us-east-1a": "${{ vars.SUBNET_US_EAST_1A }}",
	"us-east-1b": "${{ vars.SUBNET_US_EAST_1B }}",
	"us-east-1c": "${{ vars.SUBNET_US_EAST_1C }}",
	"us-east-1d": "${{ vars.SUBNET_US_EAST_1D }}",
	"us-east-1e": "${{ vars.SUBNET_US_EAST_1E }}",
	"us-east-1f": "${{ vars.SUBNET_US_EAST_1F }}"
	},
	"ec2-ami": "${{ vars.AWS_EC2_AMI_US_EAST_1 }}",
	"security-group-id": "${{ vars.SECURITY_GROUP_ID_US_EAST_1 }}"
	}
	]
	try_spot_instance_first: false
	ec2_instance_type: g4dn.2xlarge
	aws_resource_tags: >
	[
	{"Key": "Name", "Value": "instructlab-ci-github-small-runner"},
	{"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
	{"Key": "GitHubRef", "Value": "${{ github.ref }}"},
	{"Key": "GitHubPR", "Value": "${{ github.event.number }}"}
	]

	functional-gpu-small-test:
	needs:
	- start-small-ec2-runner
	runs-on: ${{ needs.start-small-ec2-runner.outputs.label }}

	# It is important that this job has no write permissions and has
	# no access to any secrets. This part is where we are running
	# untrusted code from PRs.
	permissions: {}

	steps:
	- name: Install Packages
	run: \|
	cat /etc/os-release
	sudo dnf install -y gcc gcc-c++ make git python3.11 python3.11-devel

	- name: Checkout instructlab/sdg
	uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
	with:
	# https://github.com/actions/checkout/issues/249
	fetch-depth: 0

	- name: Fetch and checkout PR
	if: github.event_name == 'pull_request_target'
	run: \|
	git fetch origin pull/${{ github.event.pull_request.number }}/merge:pr-merge-${{ github.event.pull_request.number }}
	git checkout pr-merge-${{ github.event.pull_request.number }}
	git log -1 --format="%H %s"

	- name: Install instructlab/sdg
	run: \|
	export PATH="/home/ec2-user/.local/bin:/usr/local/cuda/bin:$PATH"
	python3.11 -m venv --upgrade-deps venv
	. venv/bin/activate
	nvidia-smi
	python3.11 -m pip install tox tox-gh>=1.2 -c constraints-dev.txt
	python3.11 -m pip cache remove llama_cpp_python

	CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install -r requirements-dev.txt -c constraints-dev.txt

	- name: Check disk before tests
	run: \|
	df -h
	df -i

	- name: Run functional gpu tests with tox
	run: \|
	. venv/bin/activate
	tox -e functional-gpu

	- name: Check disk after tests
	if: ${{ always() }}
	run: \|
	df -h
	df -i

	stop-small-ec2-runner:
	needs:
	- start-small-ec2-runner
	- functional-gpu-small-test
	runs-on: ubuntu-latest
	if: ${{ always() }}
	steps:
	- name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a # v4.3.1
	with:
	aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
	aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	aws-region: ${{ needs.start-small-ec2-runner.outputs.ec2-instance-region }}

	- name: Stop EC2 runner
	uses: machulav/ec2-github-runner@fb91019e71385fb10dfcbec812b4de8c61589f7b # v2.4.1
	with:
	mode: stop
	github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
	label: ${{ needs.start-small-ec2-runner.outputs.label }}
	ec2-instance-id: ${{ needs.start-small-ec2-runner.outputs.ec2-instance-id }}

	functional-gpu-small-workflow-complete:
	# we don't want to block PRs on failed EC2 cleanup
	# so not requiring "stop-small-ec2-runner" as well
	needs: ["start-small-ec2-runner", "functional-gpu-small-test"]
	runs-on: ubuntu-latest
	steps:
	- name: Functional GPU Workflow Complete
	run: echo "Functional GPU Workflow Complete"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Merge pull request #697 from instructlab/dependabot/github_actions/py… #375

Workflow file

Merge pull request #697 from instructlab/dependabot/github_actions/py… #375

Uh oh!

Jobs

Run details

Workflow file for this run