From aff595fa7b3a959e8fcd6ce5bb327646a4967c26 Mon Sep 17 00:00:00 2001 From: Sam Avery Date: Thu, 30 Jan 2025 17:17:27 +0000 Subject: [PATCH] removing 70B full dpo config until multi-node support is available --- recipes/configs/llama3_1/70B_full_dpo.yaml | 92 ---------------------- 1 file changed, 92 deletions(-) delete mode 100644 recipes/configs/llama3_1/70B_full_dpo.yaml diff --git a/recipes/configs/llama3_1/70B_full_dpo.yaml b/recipes/configs/llama3_1/70B_full_dpo.yaml deleted file mode 100644 index c728f29522..0000000000 --- a/recipes/configs/llama3_1/70B_full_dpo.yaml +++ /dev/null @@ -1,92 +0,0 @@ -# Config for multi-device full DPO alignment in full_dpo_distributed.py -# using a Llama3.1 70B model -# -# This config assumes that you've run the following command before launching -# this run: -# tune download meta-llama/Meta-Llama-3.1-70B-Instruct --output-dir /tmp/Meta-Llama-3.1-70B-Instruct --ignore-patterns "original/consolidated.00.pth" -# -# To launch on 2 devices, run the following command from root: -# tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed --config llama3_1/70B_full_dpo -# -# You can add specific overrides through the command line. For example -# to override the checkpointer directory while launching training -# you can run: -# tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed --config llama3_1/70B_full_dpo checkpointer.checkpoint_dir= -# -# This config works best when the model is being fine-tuned on 2+ nodes with 8 H100s. - -output_dir: /tmp/torchtune/llama3_1_70B/full_dpo # /tmp may be deleted by your system. Change it to your preference. - -# Model Arguments -model: - _component_: torchtune.models.llama3_1.llama3_1_70b - -# Tokenizer -tokenizer: - _component_: torchtune.models.llama3.llama3_tokenizer - path: /tmp/Meta-Llama-3.1-70B-Instruct/original/tokenizer.model - max_seq_len: 1024 # higher increases memory - -checkpointer: - _component_: torchtune.training.FullModelHFCheckpointer - checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/ - checkpoint_files: - filename_format: model-{}-of-{}.safetensors - max_filename: "00030" - recipe_checkpoint: null - output_dir: ${output_dir} - model_type: LLAMA3 -resume_from_checkpoint: False - -ref_checkpointer: - _component_: torchtune.training.FullModelHFCheckpointer - checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/ - checkpoint_files: - filename_format: model-{}-of-{}.safetensors - max_filename: "00030" - recipe_checkpoint: null - output_dir: ${output_dir} - model_type: LLAMA3 - -# Dataset and Sampler -dataset: - _component_: torchtune.datasets.stack_exchange_paired_dataset -seed: null -shuffle: True -batch_size: 4 - -# Optimizer and Scheduler -optimizer: - _component_: torch.optim.AdamW - fused: True - weight_decay: 0.05 - lr: 1e-6 -lr_scheduler: - _component_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup - num_warmup_steps: 100 - -loss: - _component_: torchtune.rlhf.loss.DPOLoss - beta: 0.05 - label_smoothing: 0 - -# Training -epochs: 1 -max_steps_per_epoch: 1000 -gradient_accumulation_steps: 8 # Use to increase effective batch size -compile: False # torch.compile the model + loss, True increases speed + decreases memory - -# Logging -metric_logger: - _component_: torchtune.training.metric_logging.DiskLogger - log_dir: ${output_dir}/logs -log_every_n_steps: 1 -log_peak_memory_stats: True - -# Environment -device: cuda -dtype: bf16 - -# Memory management -enable_activation_checkpointing: True # True reduces memory -enable_activation_offloading: False # True reduces memory