Test and Benchmark Models #24
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Test and Benchmark Models | |
on: | |
workflow_dispatch: | |
inputs: | |
model_id: | |
description: 'Model ID on huggingface, for example: homebrewltd/llama3-s-2024-07-08' | |
required: true | |
default: homebrewltd/llama3-s-2024-07-08 | |
type: string | |
dataset_id: | |
description: 'Dataset ID on huggingface, for example: jan-hq/instruction-speech-conversation-test' | |
required: true | |
default: jan-hq/instruction-speech-conversation-test | |
type: string | |
extra_args: | |
description: 'Extra arguments for python command, for example:--mode audio --num_rows 5' | |
required: false | |
default: "--mode audio --num_rows 5" | |
type: string | |
run_benchmark: | |
description: 'Run benchmark test' | |
required: false | |
default: true | |
type: boolean | |
run audio_benchmark: | |
description: 'Run audio benchmark test' | |
required: false | |
default: true | |
type: boolean | |
jobs: | |
run-test-and-benchmark: | |
runs-on: research | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: 'recursive' | |
- name: Install dependencies | |
working-directory: ./tests | |
run: | | |
python3 -m pip install --upgrade pip | |
pip3 install -r requirements.txt | |
- name: Run tests | |
working-directory: ./tests | |
run: | | |
python3 test_case.py --model_dir ${{ github.event.inputs.model_id || 'jan-hq/Jan-Llama3-0708' }} --data_dir ${{ github.event.inputs.dataset_id || 'jan-hq/instruction-speech-conversation-test' }} ${{ github.event.inputs.extra_args || '--mode audio --num_rows 5' }} | |
- name: Install benchmark dependencies | |
if: ${{ github.event.inputs.run_benchmark == 'true' }} | |
run: | | |
cd lm-evaluation-harness | |
pip3 install -e . | |
pip3 install lm_eval[vllm] | |
echo "$HOME/.local/bin" >> $GITHUB_PATH | |
- name: Run benchmark | |
if: ${{ github.event.inputs.run_benchmark == 'true' }} | |
run: | | |
cd lm-evaluation-harness | |
chmod +x ./run_benchmark.sh | |
./run_benchmark.sh ${{ github.event.inputs.model_id }} | |
- name: Upload benchmark results | |
if: ${{ github.event.inputs.run_benchmark == 'true' }} | |
uses: actions/upload-artifact@v2 | |
with: | |
name: benchmark-results | |
path: ./lm-evaluation-harness/benchmark_results/**/*.json |