diff --git a/.github/workflows/push-image-olmo.yml b/.github/workflows/push-image-olmo.yml new file mode 100644 index 000000000..28a8c3467 --- /dev/null +++ b/.github/workflows/push-image-olmo.yml @@ -0,0 +1,81 @@ +# This is an example workflow file. +# +# When you add a new image, copy this file and then change all mentions of "hello-world" with +# the name of your new image. +# +# Read through the rest of the comments in this file to figure out how it works, and what else +# you need to change. +name: build_open_instruct_olmo + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +on: + push: + # Run this workflow anytime a push updates one of the files in the image's directory + # (other than the README), and anytime there's a new release tag for this image. + paths: + - 'open_instruct/**' + - '!open_instruct/README.md' + - 'requirements-olmo.txt' + - 'Dockerfile.olmo' + - '.github/workflows/push-image-olmo.yml' + # Note, add .olmo dockerfile + requirements if adding auto build to those + branches: [main] + # pull_request: # note, comment this out for running on every push + # # Also run on PRs that update the files in the image's directory (other than README). + # branches: [main] + # paths: + # - 'open_instruct/**' + # - '!open_instruct/README.md' + # - 'requirements-olmo.txt' + # - 'Dockerfile.olmo' + workflow_dispatch: # This allows us to manually trigger a build through the GitHub UI. + +env: + DOCKER_BUILDKIT: "1" + +jobs: + build: + name: open_instruct + runs-on: ubuntu-latest + timeout-minutes: 60 + if: (github.event_name != 'workflow_run') || (github.event.workflow_run.conclusion == 'success') + steps: + - uses: actions/checkout@v3 + with: + repository: allenai/oe-eval-internal + path: './oe-eval-internal' + ssh-key: ${{ secrets.OE_EVAL_GIT_CLONE_ACCESS_PRIVATE_SSH_DEPLOY_KEY }} + + - name: Setup environment + uses: ./.github/actions/setup + with: + beaker_token: ${{ secrets.BEAKER_TOKEN }} + # ghcr_token: ${{ secrets.GHCR_TOKEN }} + # ghcr_user: ${{ secrets.GHCR_USER }} + + # big images fail, trying this + - name: Delete huge unnecessary tools folder + run: rm -rf /opt/hostedtoolcache /usr/share/dotnet "$AGENT_TOOLSDIRECTORY" + + - name: Build image + run: | + docker build \ + --build-arg BUILDKIT_INLINE_CACHE=1 \ + --build-arg CUDA=12.1.0 --build-arg \ + TARGET=cudnn8-devel --build-arg DIST=ubuntu20.04 \ + -f Dockerfile.olmo . \ + -t open_instruct_olmo + + - name: Check image + run: | + docker run --rm open_instruct_olmo + - name: Push image + # if: github.event_name != 'pull_request' + uses: ./.github/actions/push + with: + image: open_instruct_olmo # this is the tag of the image we just built in the previous step + beaker: open_instruct_olmo_auto # this is the name of the image on Beaker + latest: true # this flag says we should also push this as the 'latest' version to GHCR diff --git a/.github/workflows/push-image.yml b/.github/workflows/push-image.yml index 40e205fb3..f5a35fdc0 100644 --- a/.github/workflows/push-image.yml +++ b/.github/workflows/push-image.yml @@ -44,8 +44,6 @@ jobs: timeout-minutes: 60 if: (github.event_name != 'workflow_run') || (github.event.workflow_run.conclusion == 'success') steps: - - uses: actions/checkout@v3 - - uses: actions/checkout@v3 with: repository: allenai/oe-eval-internal @@ -69,7 +67,6 @@ jobs: --build-arg BUILDKIT_INLINE_CACHE=1 \ --build-arg CUDA=12.1.0 --build-arg \ TARGET=cudnn8-devel --build-arg DIST=ubuntu20.04 \ - --build-arg REQUIRE=requirements.txt . \ -t open_instruct diff --git a/Dockerfile.olmo b/Dockerfile.olmo new file mode 100644 index 000000000..40ef4377a --- /dev/null +++ b/Dockerfile.olmo @@ -0,0 +1,121 @@ +ARG CUDA +ARG DIST +ARG TARGET +FROM --platform=linux/amd64 nvidia/cuda:${CUDA}-${TARGET}-${DIST} + +ARG DEBIAN_FRONTEND="noninteractive" +ENV TZ="America/Los_Angeles" + +# Install base tools. +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + git \ + jq \ + language-pack-en \ + make \ + sudo \ + unzip \ + vim \ + wget \ + parallel \ + iputils-ping \ + tmux + +ARG BEAKER_VERSION +RUN curl --silent \ + --connect-timeout 5 \ + --max-time 10 \ + --retry 5 \ + --retry-delay 0 \ + --retry-max-time 40 \ + --output beaker.tar.gz \ + "https://beaker.org/api/v3/release/cli?os=linux&arch=amd64&version=${BEAKER_VERSION}" \ + && tar -zxf beaker.tar.gz -C /usr/local/bin/ ./beaker \ + && rm beaker.tar.gz + +# This ensures the dynamic linker (or NVIDIA's container runtime, I'm not sure) +# puts the right NVIDIA things in the right place (that THOR requires). +ENV NVIDIA_DRIVER_CAPABILITIES=graphics,utility,compute + +# Install conda. We give anyone in the users group the ability to run +# conda commands and install packages in the base (default) environment. +# Things installed into the default environment won't persist, but we prefer +# convenience in this case and try to make sure the user is aware of this +# with a message that's printed when the session starts. +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py310_23.1.0-1-Linux-x86_64.sh \ + && echo "32d73e1bc33fda089d7cd9ef4c1be542616bd8e437d1f77afeeaf7afdb019787 Miniconda3-py310_23.1.0-1-Linux-x86_64.sh" \ + | sha256sum --check \ + && bash Miniconda3-py310_23.1.0-1-Linux-x86_64.sh -b -p /opt/miniconda3 \ + && rm Miniconda3-py310_23.1.0-1-Linux-x86_64.sh + +ENV PATH=/opt/miniconda3/bin:/opt/miniconda3/condabin:$PATH +ENV LD_LIBRARY_PATH=/usr/local/cuda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH + +# Install a few additional utilities via pip +RUN /opt/miniconda3/bin/pip install --no-cache-dir \ + gpustat \ + jupyter \ + beaker-gantry \ + oocmap + +# Ensure users can modify their container environment. +RUN echo '%users ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers + +# Make the base image friendlier for interactive workloads. This makes things like the man command +# work. +RUN yes | unminimize + +# Install MLNX OFED user-space drivers +# See https://docs.nvidia.com/networking/pages/releaseview.action?pageId=15049785#Howto:DeployRDMAacceleratedDockercontaineroverInfiniBandfabric.-Dockerfile +ENV MOFED_VER 5.8-1.1.2.1 +ENV OS_VER ubuntu20.04 +ENV PLATFORM x86_64 +RUN wget --quiet https://content.mellanox.com/ofed/MLNX_OFED-${MOFED_VER}/MLNX_OFED_LINUX-${MOFED_VER}-${OS_VER}-${PLATFORM}.tgz && \ + tar -xvf MLNX_OFED_LINUX-${MOFED_VER}-${OS_VER}-${PLATFORM}.tgz && \ + MLNX_OFED_LINUX-${MOFED_VER}-${OS_VER}-${PLATFORM}/mlnxofedinstall --basic --user-space-only --without-fw-update -q && \ + rm -rf MLNX_OFED_LINUX-${MOFED_VER}-${OS_VER}-${PLATFORM} && \ + rm MLNX_OFED_LINUX-${MOFED_VER}-${OS_VER}-${PLATFORM}.tgz + +# The -l flag makes bash act as a login shell and load /etc/profile, etc. +ENTRYPOINT ["bash", "-l"] + +WORKDIR /stage/ + +# TODO When updating flash-attn or torch in the future, make sure to update the version in the requirements.txt file. +ENV HF_HUB_ENABLE_HF_TRANSFER=1 +COPY requirements-olmo.txt . +RUN pip install --upgrade pip "setuptools<70.0.0" wheel +# TODO, unpin setuptools when this issue in flash attention is resolved +RUN pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu121 +RUN pip install packaging +RUN pip install flash-attn==2.5.9.post1 --no-build-isolation +# for newest olmo's, move to requirements when ai2-olmo supports torch 2.4 +# core is a dependency of ai2-olmo +RUN pip install ai2-olmo-core==0.1.0 omegaconf +# RUN pip install ai2-olmo>=0.5.0 --no-deps +# TODO Update Once this is merged https://github.com/allenai/OLMo/pull/719, then next release +RUN pip install git+https://github.com/allenai/OLMo.git@47f8f5abb40eb100c6623be12e1648c841b2ab99 --no-deps +RUN pip install -r requirements-olmo.txt + +RUN pip install git+https://github.com/AkshitaB/vllm.git +RUN pip install vllm-flash-attn + + +# NLTK download +RUN python -m nltk.downloader punkt +COPY open_instruct open_instruct +COPY oe-eval-internal oe-eval-internal + +# install the package in editable mode +COPY pyproject.toml . +RUN pip install -e . +COPY .git/ ./.git/ +COPY eval eval +COPY configs configs +COPY scripts scripts +COPY mason.py mason.py +RUN chmod +x scripts/* + +# for interactive session +RUN chmod -R 777 /stage/ diff --git a/README.md b/README.md index 1e6fe9409..2eaef5b9b 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ pip install -r weight-diff-requirements.txt For a second installation strategy, if you'd like to *run experiments within a Docker environment*, you can create one using: ```bash -docker build --build-arg CUDA=12.1.0 --build-arg TARGET=cudnn8-devel --build-arg DIST=ubuntu20.04 --build-arg REQUIRE=requirements.txt . -t open_instruct +docker build --build-arg CUDA=12.1.0 --build-arg TARGET=cudnn8-devel --build-arg DIST=ubuntu20.04 . -t open_instruct # if you are interally at AI2, you can create an image like this: beaker image create open_instruct -n open_instruct -w ai2/$(whoami) diff --git a/configs/beaker_configs/default_finetune_offloading.yaml b/configs/beaker_configs/default_finetune_offloading.yaml new file mode 100644 index 000000000..4722b4e13 --- /dev/null +++ b/configs/beaker_configs/default_finetune_offloading.yaml @@ -0,0 +1,69 @@ +version: v2 +description: open-instruct-finetune +budget: ai2/oe-adapt +tasks: + - name: open-instruct-finetune + image: + beaker: nathanl/open_instruct_auto + command: [ + '/bin/sh', '-c' + ] + arguments: ['PYTHONPATH="/stage:$PYTHONPATH" accelerate launch + --mixed_precision bf16 + --num_machines 1 + --num_processes 4 + --use_deepspeed + --deepspeed_config_file configs/ds_configs/stage3_offloading_accelerate.conf + open_instruct/finetune.py + --model_name_or_path /hf_llama_models + --use_flash_attn + --tokenizer_name /hf_llama_models + --max_seq_length 2048 + --preprocessing_num_workers 16 + --per_device_train_batch_size 2 + --gradient_accumulation_steps 16 + --learning_rate 2e-5 + --lr_scheduler_type linear + --warmup_ratio 0.03 + --weight_decay 0. + --num_train_epochs 2 + --output_dir /output/ + --with_tracking + --report_to tensorboard + --logging_steps 1 + '] + envVars: + - name: CUDA_DEVICE_ORDER + value: PCI_BUS_ID + - name: TRANSFORMERS_CACHE + value: ./cache/ + - name: WANDB_API_KEY + secret: WANDB_API_KEY + - name: WANDB_PROJECT + value: open-instruct + - name: WANDB_WATCH + value: false + - name: WANDB_LOG_MODEL + value: false + - name: WANDB_DISABLED + value: true + - name: HF_TOKEN + secret: HF_TOKEN + # datasets: # example for how to include datasets in mounting + # - mountPath: /data + # source: + # beaker: Yizhongw03/processed_open_instruct_data + # - mountPath: /mmlu + # source: + # beaker: Yizhongw03/mmlu + # - mountPath: /hf_llama_models + # source: + # beaker: Yizhongw03/hf_llama_model_7B + result: + path: /output + resources: + gpuCount: 4 + context: + cluster: ai2/allennlp-cirrascale + priority: high + preemptible: false \ No newline at end of file diff --git a/configs/train_configs/dpo/olmo_7b_0924.yaml b/configs/train_configs/dpo/olmo_7b_0924.yaml new file mode 100644 index 000000000..3028bfca8 --- /dev/null +++ b/configs/train_configs/dpo/olmo_7b_0924.yaml @@ -0,0 +1,29 @@ +model_name_or_path: /model +model_revision: main +use_flash_attn: true +gradient_checkpointing: true +dataset_mixer: + allenai/ultrafeedback_binarized_cleaned_train: 1.0 + ai2-adapt-dev/DaringAnteater-prefs-RM-filter: 1.0 + ai2-adapt-dev/WildChat-prefs-280824: 1.0 + allenai/tulu-3-hardcoded-preferences: 1.0 +tokenizer_name: /model +use_slow_tokenizer: true +max_seq_length: 2048 +preprocessing_num_workers: 16 +per_device_train_batch_size: 1 +gradient_accumulation_steps: 16 # designed for 8 GPUs, so batch size 128 +learning_rate: 5.0e-7 +lr_scheduler_type: linear +warmup_ratio: 0.1 +weight_decay: 0.0 +num_train_epochs: 1 +output_dir: /output +with_tracking: true +report_to: + - wandb +logging_steps: 1 +use_lora: false +dpo_loss_type: dpo_norm +dpo_beta: 5 +checkpointing_steps: 1000 \ No newline at end of file diff --git a/configs/train_configs/sft/olmo_7b_0924.yaml b/configs/train_configs/sft/olmo_7b_0924.yaml new file mode 100644 index 000000000..e8264bd0a --- /dev/null +++ b/configs/train_configs/sft/olmo_7b_0924.yaml @@ -0,0 +1,22 @@ +model_name_or_path: ai2-adapt-dev/OLMo-medium-peteish7-anneal-from-928646-50B-nowup-dclm07-flan +model_revision: main +use_flash_attn: true +tokenizer_name: ai2-adapt-dev/OLMo-medium-peteish7-anneal-from-928646-50B-nowup-dclm07-flan +use_slow_tokenizer: false # olmo models only use fast tokenizers +dataset_name: allenai/llama-3-tulu-v3.3-mix-preview +max_seq_length: 4096 +preprocessing_num_workers: 128 +per_device_train_batch_size: 1 +gradient_accumulation_steps: 8 # should run with this set to 16 for 1 node only +learning_rate: 2.0e-06 +lr_scheduler_type: linear +warmup_ratio: 0.03 +weight_decay: 0.0 +num_train_epochs: 3 +output_dir: /output/ +with_tracking: true +report_to: + - wandb +logging_steps: 1 +checkpointing_steps: epoch +add_bos: true \ No newline at end of file diff --git a/configs/train_configs/sft/olmo_7b_0924_fw2_permissive.yaml b/configs/train_configs/sft/olmo_7b_0924_fw2_permissive.yaml new file mode 100644 index 000000000..4539f713a --- /dev/null +++ b/configs/train_configs/sft/olmo_7b_0924_fw2_permissive.yaml @@ -0,0 +1,36 @@ +# model_name_or_path: ai2-adapt-dev/OLMo-medium-peteish7-anneal-from-928646-50B-nowup-dclm07-flan +model_name_or_path: /adapt-data/ai2-llm/checkpoints/OLMo-medium/peteish7-anneal-from-928646-50B-nowup-dclm07-fw2/step11931-hf +model_revision: main +use_flash_attn: true +# tokenizer_name: ai2-adapt-dev/OLMo-medium-peteish7-anneal-from-928646-50B-nowup-dclm07-flan +tokenizer_name: /adapt-data/ai2-llm/checkpoints/OLMo-medium/peteish7-anneal-from-928646-50B-nowup-dclm07-fw2/step11931-hf +use_slow_tokenizer: false # olmo models only use fast tokenizers +dataset_mixer: + ai2-adapt-dev/metamath-qa-reformat: 1.0 # MIT License + natolambert/tulu-v2-sft-mixture-flan: 1.0 # FLAN Apache 2.0 + natolambert/tulu-v2-sft-mixture-cot: 1.0 # FLAN Apache 2.0 + allenai/openassistant-guanaco-reformatted: 1.0 # Apache 2.0 + ai2-adapt-dev/codefeedback-single-turn-reformat-magicoder: 1.0 # MIT MagiCoder section of CodeFeedback + ai2-adapt-dev/aya_dataset-reformat: 1.0 # Apache 2.0 + ai2-adapt-dev/SlimOrca-reformat: 0.25 # MIT License + ai2-adapt-dev/Daring-Anteater-reformat: 1.0 # CC BY 4.0 + ai2-adapt-dev/WebInstructSub-reformat-apache: 0.1 # Apache 2.0 + ai2-adapt-dev/Table-GPT-All-train: 0.5 # MIT +max_seq_length: 4096 +preprocessing_num_workers: 128 +per_device_train_batch_size: 1 +gradient_accumulation_steps: 4 # designed for 4 nodes +# gradient_accumulation_steps: 16 # designed for 1 nodes +gradient_checkpointing: true +learning_rate: 2.0e-06 +lr_scheduler_type: linear +warmup_ratio: 0.03 +weight_decay: 0.0 +num_train_epochs: 3 +output_dir: /output/ +with_tracking: true +report_to: + - wandb +logging_steps: 1 +checkpointing_steps: epoch +add_bos: true \ No newline at end of file diff --git a/configs/train_configs/sft/olmo_7b_0924_fw2_tulu_v3.4.yaml b/configs/train_configs/sft/olmo_7b_0924_fw2_tulu_v3.4.yaml new file mode 100644 index 000000000..491fc4502 --- /dev/null +++ b/configs/train_configs/sft/olmo_7b_0924_fw2_tulu_v3.4.yaml @@ -0,0 +1,28 @@ +# model_name_or_path: ai2-adapt-dev/OLMo-medium-peteish7-anneal-from-928646-50B-nowup-dclm07-flan +model_name_or_path: /adapt-data/ai2-llm/checkpoints/OLMo-medium/peteish7-anneal-from-928646-50B-nowup-dclm07-fw2/step11931-hf +model_revision: main +use_flash_attn: true +# tokenizer_name: ai2-adapt-dev/OLMo-medium-peteish7-anneal-from-928646-50B-nowup-dclm07-flan +tokenizer_name: /adapt-data/ai2-llm/checkpoints/OLMo-medium/peteish7-anneal-from-928646-50B-nowup-dclm07-fw2/step11931-hf +use_slow_tokenizer: false # olmo models only use fast tokenizers +dataset_name: allenai/tulu-v3.4-mix-preview +max_seq_length: 4096 +preprocessing_num_workers: 128 +per_device_train_batch_size: 1 +# gradient_accumulation_steps: 4 # designed for 4 nodes +gradient_accumulation_steps: 8 # designed for 2 nodes +# gradient_accumulation_steps: 16 # designed for 1 nodes +gradient_checkpointing: true +learning_rate: 2.0e-06 +lr_scheduler_type: linear +warmup_ratio: 0.03 +weight_decay: 0.0 +num_train_epochs: 3 +output_dir: /output/ +with_tracking: true +reduce_loss: mean +report_to: + - wandb +logging_steps: 1 +checkpointing_steps: epoch +add_bos: true \ No newline at end of file diff --git a/open_instruct/dpo_tune.py b/open_instruct/dpo_tune.py index cbd95c2a3..36b643f4c 100644 --- a/open_instruct/dpo_tune.py +++ b/open_instruct/dpo_tune.py @@ -67,6 +67,7 @@ from open_instruct.model_utils import push_folder_to_hub, save_with_accelerate from open_instruct.utils import ( ArgumentParserPlus, + check_hf_olmo_availability, clean_last_n_checkpoints, get_datasets, get_last_checkpoint_path, @@ -469,6 +470,12 @@ def prepare_deepspeed(accelerator, model): def main(args: FlatArguments): + # try to import OLMo for automodel + if check_hf_olmo_availability(): + # allows AutoModel... to work with not in transformers olmo models + import hf_olmo # noqa + from hf_olmo import OLMoTokenizerFast + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers # in the environment @@ -673,7 +680,9 @@ def load_model(): 0, 1, ], "LlamaTokenizer should only add one special token - the pad_token, or no tokens if pad token present." - elif isinstance(tokenizer, GPTNeoXTokenizerFast): + elif isinstance(tokenizer, GPTNeoXTokenizerFast) or ( + check_hf_olmo_availability() and isinstance(tokenizer, OLMoTokenizerFast) + ): # OLMo newer models use this tokenizer if tokenizer.bos_token is None: tokenizer.bos_token = tokenizer.eos_token diff --git a/open_instruct/finetune.py b/open_instruct/finetune.py index e7cf30916..edb6da6b9 100644 --- a/open_instruct/finetune.py +++ b/open_instruct/finetune.py @@ -56,6 +56,7 @@ from open_instruct.model_utils import push_folder_to_hub, save_with_accelerate from open_instruct.utils import ( ArgumentParserPlus, + check_hf_olmo_availability, clean_last_n_checkpoints, get_datasets, get_last_checkpoint_path, @@ -453,6 +454,12 @@ def encode_sft_example(example, tokenizer, max_seq_length): def main(args: FlatArguments): + # try to import OLMo for automodel + if check_hf_olmo_availability(): + # allows AutoModel... to work with not in transformers olmo models + import hf_olmo # noqa + from hf_olmo import OLMoTokenizerFast + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers # in the environment @@ -646,7 +653,9 @@ def main(args: FlatArguments): 0, 1, ], "LlamaTokenizer should only add one special token - the pad_token, or no tokens if pad token present." - elif isinstance(tokenizer, GPTNeoXTokenizerFast): + elif isinstance(tokenizer, GPTNeoXTokenizerFast) or ( + check_hf_olmo_availability() and isinstance(tokenizer, OLMoTokenizerFast) + ): # OLMo newer models use this tokenizer if tokenizer.bos_token is None: tokenizer.bos_token = tokenizer.eos_token diff --git a/open_instruct/mix_data.py b/open_instruct/mix_data.py index 05b8113cc..ea3490b47 100644 --- a/open_instruct/mix_data.py +++ b/open_instruct/mix_data.py @@ -14,9 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from open_instruct.finetune import FlatArguments # script for mixing and saving data +from open_instruct.finetune import FlatArguments from open_instruct.utils import ArgumentParserPlus, get_datasets # Run as module for local imports, e.g.: diff --git a/open_instruct/utils.py b/open_instruct/utils.py index 120b9d087..b560478e4 100644 --- a/open_instruct/utils.py +++ b/open_instruct/utils.py @@ -14,6 +14,7 @@ import dataclasses import functools +import importlib import json import logging import os @@ -53,6 +54,40 @@ """ +# ---------------------------------------------------------------------------- +# Import utilities +def check_hf_olmo_availability(return_version: bool = False) -> Union[dict, bool]: + pkg_name = "hf_olmo" + + # Check if the package spec exists + package_exists = importlib.util.find_spec(pkg_name) is not None + package_version = "N/A" + + if package_exists: + try: + # Primary method to get the package version + package_version = importlib.metadata.version(pkg_name) + except importlib.metadata.PackageNotFoundError: + # Fallback method + try: + package = importlib.import_module(pkg_name) + package_version = getattr(package, "__version__", "N/A") + except ImportError: + package_exists = False + package_version = "N/A" + + if return_version: + return { + "available": package_exists, + "version": package_version, + "python_version": sys.version, + "os": os.name, + "platform": sys.platform, + } + else: + return package_exists + + # ---------------------------------------------------------------------------- # Dataset utilities def is_openai_format(messages: Any) -> bool: diff --git a/requirements-olmo.txt b/requirements-olmo.txt new file mode 100644 index 000000000..1ec51fb2f --- /dev/null +++ b/requirements-olmo.txt @@ -0,0 +1,46 @@ +# TODO When updating flash-attn or torch in the future, make sure to update the version in the Dockerfile +torch==2.4.0 +scipy +packaging +sentencepiece +datasets +deepspeed==0.14.4 +accelerate==0.31.0 +peft>=0.11.1 +bitsandbytes>=0.41.1 +evaluate>=0.4.0 +tokenizers==0.19.1 +protobuf +transformers==4.43.4 +openai>=1.0.0 +tiktoken +rouge_score +tensorboard +wandb +gradio>=3.50.2 +termcolor +jsonlines +unidic-lite +einops +flash-attn==2.5.9.post1 # should really only be in dockerfile. Local env often doesn't have GPUs +fire +alpaca-eval==0.6.2 +# for human eval web app +flask +openpyxl +# for ifeval +nltk==3.8.1 +langdetect +immutabledict +# for math evaluations +antlr4-python3-runtime==4.9.2 +mpmath==1.3.0 +sympy==1.12.0 +# for linting +black +flake8 +isort +autoflake +pytest +hf_transfer +beaker-py \ No newline at end of file diff --git a/scripts/eval/oe-eval.sh b/scripts/eval/oe-eval.sh index ed605b685..c30ce38cf 100755 --- a/scripts/eval/oe-eval.sh +++ b/scripts/eval/oe-eval.sh @@ -97,7 +97,10 @@ TASKS=( "alpaca_eval_v2::tulu" "truthfulqa::tulu" ) -MODEL_TYPE="--model-type vllm" +# For models without VLLM (experimental architectures) +# comment out the VLLM arg and set GPU_COUNT_OTHER to 1 +# also consider lowering the batch size (VLLM arg), maybe to 5, VLLM handles it differently +# MODEL_TYPE="--model-type vllm" BATCH_SIZE_VLLM=10000 BATCH_SIZE_OTHER=1 # Set GPU_COUNT and GPU_COUNT_OTHER based on NUM_GPUS diff --git a/scripts/submit_finetune_job.py b/scripts/submit_finetune_job.py index 7b7e7609f..617200b1c 100644 --- a/scripts/submit_finetune_job.py +++ b/scripts/submit_finetune_job.py @@ -25,6 +25,8 @@ def main(): parser.add_argument("--num_nodes", type=int, default=1, help="Number of nodes to use") parser.add_argument("--image", type=str, default="nathanl/open_instruct_auto", help="Beaker image to use.") parser.add_argument("--workspace", type=str, default="ai2/tulu-2-improvements", help="Beaker workspace to use.") + parser.add_argument("--mount_on_weka", type=str, default=None, help="Mount a Weka directory to the job") + parser.add_argument("--weka_mount_path", type=str, default="/adapt-data", help="Path to mount the Weka directory") # allow unknown args from CLI, use this to modify loaded config in bash scripts for sweeping # Note, can only override args in --config passed (not default FlatArguments class in open_instruct/utils.py) @@ -166,7 +168,7 @@ def parse_args(args): d['tasks'][0]['arguments'][0] = new_arguments # name and description - exp_name = f"open_instruct_finetune_{model_name}_{now}" + exp_name = f"open_instruct_finetune_{model_name}_{now}"[:128] d['description'] = exp_name d['tasks'][0]['name'] = exp_name @@ -221,6 +223,14 @@ def parse_args(args): d['tasks'][0]['envVars'].append({ 'name': 'WANDB_API_KEY', 'secret': f"{beaker_whoami}_WANDB_API_KEY" }) + + # Weka setting + if args.mount_on_weka: + if d['tasks'][0].get('datasets') is None: + d['tasks'][0]['datasets'] = [] + d['tasks'][0]['datasets'].append({ + 'mountPath': f"{args.weka_mount_path}", 'source': {'weka': f"{args.mount_on_weka}"} + }) # optionally, print to debug config print(d)