Skip to content

Introduce a way to mix generated datasets before sending to training #398

Introduce a way to mix generated datasets before sending to training

Introduce a way to mix generated datasets before sending to training #398

Workflow file for this run

# SPDX-License-Identifier: Apache-2.0
name: E2E test
on:
push:
branches:
- "main"
- "release-**"
paths:
- '**.py'
- 'src/**.yaml'
- 'pyproject.toml'
- 'requirements*.txt'
- '.github/workflows/e2e.yml'
pull_request:
branches:
- "main"
- "release-**"
paths:
- '**.py'
- 'src/**.yaml'
- 'pyproject.toml'
- 'requirements*.txt'
- '.github/workflows/e2e.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
e2e:
runs-on: ubuntu-gpu
permissions:
pull-requests: write
steps:
# No step-security/harden-runner since this is a self-hosted runner
- name: Checkout instructlab/sdg
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
# https://github.com/actions/checkout/issues/249
fetch-depth: 0
- name: Checkout instructlab/instructlab
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
repository: "instructlab/instructlab"
path: "instructlab"
fetch-depth: 0
- name: Install Packages
run: |
sudo apt-get install -y cuda-toolkit git cmake build-essential virtualenv
nvidia-smi
sudo ls -l /dev/nvidia*
- name: Setup Python 3.11
uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
with:
python-version: 3.11
cache: pip
cache-dependency-path: |
**/pyproject.toml
**/requirements*.txt
- name: Remove llama-cpp-python from cache
run: |
pip cache remove llama_cpp_python
- name: Cache huggingface
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: ~/.cache/huggingface
# config contains DEFAULT_MODEL
key: huggingface-${{ hashFiles('src/instructlab/configuration.py') }}
- name: Install instructlab and instructlab-sdg
run: |
export PATH="/home/runner/.local/bin:/usr/local/cuda/bin:$PATH"
python3 -m venv venv
. venv/bin/activate
cd instructlab
sed 's/\[.*\]//' requirements.txt > constraints.txt
python3 -m pip cache remove llama_cpp_python
CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install --no-binary llama_cpp_python -c constraints.txt llama_cpp_python
# needed for --4-bit-quant option to ilab train
python3 -m pip install bitsandbytes
# install instructlab
python3 -m pip install .
cd ..
# Install instructlab-sdg
python3 -m pip install .
- name: Run e2e test
run: |
. venv/bin/activate
./instructlab/scripts/basic-workflow-tests.sh -cm
- name: Remove llama-cpp-python from cache
if: always()
run: |
pip cache remove llama_cpp_python