Skip to content

Add statistical test for regex-guided generation #347

Add statistical test for regex-guided generation

Add statistical test for regex-guided generation #347

name: Benchmark PR
on:
push:
pull_request:
types: [synchronize, labeled]
workflow_dispatch:
env:
PYTHON_VERSION: "3.10"
WORKING_DIR: ${{ github.workspace }}/benchmarks
BENCHMARKS_OUTPUT: ${{ github.workspace }}/benchmarks_output
permissions:
contents: read
# Cancels all previous workflow runs for pull requests that have not completed.
concurrency:
# The concurrency group contains the workflow name and the branch name for pull requests
# or the commit hash for any other events.
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.head_ref || github.sha }}
cancel-in-progress: true
jobs:
benchmark-pr:
runs-on: ubuntu-latest
if: ${{ contains(github.event.pull_request.labels.*.name, 'run-benchmarks') || github.ref == 'refs/heads/main' }}
defaults:
run:
working-directory: ${{ env.WORKING_DIR }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install asv virtualenv
- name: Run benchmarks
shell: bash -l {0}
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
TQDM_DISABLE: 1
OPENBLAS_NUM_THREADS: 1
MKL_NUM_THREADS: 1
OMP_NUM_THREADS: 1
ASV_FACTOR: 1.5
# Escape user controlled variables
BASELINE_LABEL: "${{ github.event.pull_request.base.label }}"
CONTENDER_LABEL: "${{ github.event.pull_request.head.label }}"
run: |
set -x
# ID this runner
asv machine --yes
echo "Baseline: ${{ github.event.pull_request.base.sha }} ($BASELINE_LABEL)"
echo "Contender: ${GITHUB_SHA} ($CONTENDER_LABEL)"
# Run benchmarks for current commit against base
ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR --no-only-changed"
asv continuous $ASV_OPTIONS ${{ github.event.pull_request.base.sha }} ${GITHUB_SHA} \
| sed "/Traceback \|failed$\|PERFORMANCE DECREASED/ s/^/::error::/" \
| tee benchmarks.log
# Report and export results for subsequent steps
if grep "Traceback \|failed\|PERFORMANCE DECREASED" benchmarks.log > /dev/null ; then
exit 1
fi