From aff595fa7b3a959e8fcd6ce5bb327646a4967c26 Mon Sep 17 00:00:00 2001
From: Sam Avery <sam@inflection.ai>
Date: Thu, 30 Jan 2025 17:17:27 +0000
Subject: [PATCH] removing 70B full dpo config until multi-node support is
 available

---
 recipes/configs/llama3_1/70B_full_dpo.yaml | 92 ----------------------
 1 file changed, 92 deletions(-)
 delete mode 100644 recipes/configs/llama3_1/70B_full_dpo.yaml

diff --git a/recipes/configs/llama3_1/70B_full_dpo.yaml b/recipes/configs/llama3_1/70B_full_dpo.yaml
deleted file mode 100644
index c728f29522..0000000000
--- a/recipes/configs/llama3_1/70B_full_dpo.yaml
+++ /dev/null
@@ -1,92 +0,0 @@
-# Config for multi-device full DPO alignment in full_dpo_distributed.py
-# using a Llama3.1 70B model
-#
-# This config assumes that you've run the following command before launching
-# this run:
-#   tune download meta-llama/Meta-Llama-3.1-70B-Instruct --output-dir /tmp/Meta-Llama-3.1-70B-Instruct --ignore-patterns "original/consolidated.00.pth"
-#
-# To launch on 2 devices, run the following command from root:
-#   tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed --config llama3_1/70B_full_dpo
-#
-# You can add specific overrides through the command line. For example
-# to override the checkpointer directory while launching training
-# you can run:
-#   tune run --nnodes 1 --nproc_per_node 2 full_dpo_distributed --config llama3_1/70B_full_dpo checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
-#
-# This config works best when the model is being fine-tuned on 2+ nodes with 8 H100s.
-
-output_dir: /tmp/torchtune/llama3_1_70B/full_dpo # /tmp may be deleted by your system. Change it to your preference.
-
-# Model Arguments
-model:
-  _component_: torchtune.models.llama3_1.llama3_1_70b
-
-# Tokenizer
-tokenizer:
-  _component_: torchtune.models.llama3.llama3_tokenizer
-  path: /tmp/Meta-Llama-3.1-70B-Instruct/original/tokenizer.model
-  max_seq_len: 1024 # higher increases memory
-
-checkpointer:
-  _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
-  checkpoint_files:
-    filename_format: model-{}-of-{}.safetensors
-    max_filename: "00030"
-  recipe_checkpoint: null
-  output_dir: ${output_dir}
-  model_type: LLAMA3
-resume_from_checkpoint: False
-
-ref_checkpointer:
-  _component_: torchtune.training.FullModelHFCheckpointer
-  checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
-  checkpoint_files:
-    filename_format: model-{}-of-{}.safetensors
-    max_filename: "00030"
-  recipe_checkpoint: null
-  output_dir: ${output_dir}
-  model_type: LLAMA3
-
-# Dataset and Sampler
-dataset:
-  _component_: torchtune.datasets.stack_exchange_paired_dataset
-seed: null
-shuffle: True
-batch_size: 4
-
-# Optimizer and Scheduler
-optimizer:
-  _component_: torch.optim.AdamW
-  fused: True
-  weight_decay: 0.05
-  lr: 1e-6
-lr_scheduler:
-  _component_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
-  num_warmup_steps: 100
-
-loss:
-  _component_: torchtune.rlhf.loss.DPOLoss
-  beta: 0.05
-  label_smoothing: 0
-
-# Training
-epochs: 1
-max_steps_per_epoch: 1000
-gradient_accumulation_steps: 8  # Use to increase effective batch size
-compile: False  # torch.compile the model + loss, True increases speed + decreases memory
-
-# Logging
-metric_logger:
-  _component_: torchtune.training.metric_logging.DiskLogger
-  log_dir: ${output_dir}/logs
-log_every_n_steps: 1
-log_peak_memory_stats: True
-
-# Environment
-device: cuda
-dtype: bf16
-
-# Memory management
-enable_activation_checkpointing: True  # True reduces memory
-enable_activation_offloading: False  # True reduces memory