Introduce a way to mix generated datasets before sending to training #398
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-License-Identifier: Apache-2.0 | |
name: E2E test | |
on: | |
push: | |
branches: | |
- "main" | |
- "release-**" | |
paths: | |
- '**.py' | |
- 'src/**.yaml' | |
- 'pyproject.toml' | |
- 'requirements*.txt' | |
- '.github/workflows/e2e.yml' | |
pull_request: | |
branches: | |
- "main" | |
- "release-**" | |
paths: | |
- '**.py' | |
- 'src/**.yaml' | |
- 'pyproject.toml' | |
- 'requirements*.txt' | |
- '.github/workflows/e2e.yml' | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
cancel-in-progress: true | |
jobs: | |
e2e: | |
runs-on: ubuntu-gpu | |
permissions: | |
pull-requests: write | |
steps: | |
# No step-security/harden-runner since this is a self-hosted runner | |
- name: Checkout instructlab/sdg | |
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 | |
with: | |
# https://github.com/actions/checkout/issues/249 | |
fetch-depth: 0 | |
- name: Checkout instructlab/instructlab | |
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 | |
with: | |
repository: "instructlab/instructlab" | |
path: "instructlab" | |
fetch-depth: 0 | |
- name: Install Packages | |
run: | | |
sudo apt-get install -y cuda-toolkit git cmake build-essential virtualenv | |
nvidia-smi | |
sudo ls -l /dev/nvidia* | |
- name: Setup Python 3.11 | |
uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 | |
with: | |
python-version: 3.11 | |
cache: pip | |
cache-dependency-path: | | |
**/pyproject.toml | |
**/requirements*.txt | |
- name: Remove llama-cpp-python from cache | |
run: | | |
pip cache remove llama_cpp_python | |
- name: Cache huggingface | |
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 | |
with: | |
path: ~/.cache/huggingface | |
# config contains DEFAULT_MODEL | |
key: huggingface-${{ hashFiles('src/instructlab/configuration.py') }} | |
- name: Install instructlab and instructlab-sdg | |
run: | | |
export PATH="/home/runner/.local/bin:/usr/local/cuda/bin:$PATH" | |
python3 -m venv venv | |
. venv/bin/activate | |
cd instructlab | |
sed 's/\[.*\]//' requirements.txt > constraints.txt | |
python3 -m pip cache remove llama_cpp_python | |
CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install --no-binary llama_cpp_python -c constraints.txt llama_cpp_python | |
# needed for --4-bit-quant option to ilab train | |
python3 -m pip install bitsandbytes | |
# install instructlab | |
python3 -m pip install . | |
cd .. | |
# Install instructlab-sdg | |
python3 -m pip install . | |
- name: Run e2e test | |
run: | | |
. venv/bin/activate | |
./instructlab/scripts/basic-workflow-tests.sh -cm | |
- name: Remove llama-cpp-python from cache | |
if: always() | |
run: | | |
pip cache remove llama_cpp_python |