From 84300ecba27a0133fe0e38b1218e67fbe6a5f187 Mon Sep 17 00:00:00 2001 From: Valentina Pyatkin Date: Tue, 5 Nov 2024 16:37:57 -0800 Subject: [PATCH] Add files via upload --- .../train_configs/dpo/best_current_mix.yaml | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 configs/train_configs/dpo/best_current_mix.yaml diff --git a/configs/train_configs/dpo/best_current_mix.yaml b/configs/train_configs/dpo/best_current_mix.yaml new file mode 100644 index 000000000..1983dcde0 --- /dev/null +++ b/configs/train_configs/dpo/best_current_mix.yaml @@ -0,0 +1,41 @@ +model_name_or_path: /model +model_revision: main +use_flash_attn: true +gradient_checkpointing: true +dataset_mixer: + + #LJ's helpsteer regenerations + # orig Helpsteer2 + ai2-adapt-dev/helpsteer2-uf-pipeline-regen: 1.0 + + #LJ's UF regenerations + ai2-adapt-dev/tulu3.4-sft-replica-50k-gpt4-prefs-on-policy: 1.0 + + #LJ's regenerations of my datasets + ai2-adapt-dev/DaringAnteater-prefs-RM-filter-uf-pipeline-regen: 1.0 + ai2-adapt-dev/WildChat-prefs-280824-uf-pipeline-regen: 1.0 + ai2-adapt-dev/Llama-3.1-if_taxonomy_tulu-uf-regen: 1.0 + + #LJ's unused WildChat regenerations + ai2-adapt-dev/wildchat_v3.9_unused_off_policy: 1.0 + +tokenizer_name: /model +use_slow_tokenizer: true +max_seq_length: 2048 +preprocessing_num_workers: 16 +per_device_train_batch_size: 1 +gradient_accumulation_steps: 16 # designed for 8 GPUs, so batch size 128 +learning_rate: 5.0e-7 +lr_scheduler_type: linear +warmup_ratio: 0.1 +weight_decay: 0.0 +num_train_epochs: 1 +output_dir: /output +with_tracking: true +report_to: + - wandb +logging_steps: 1 +use_lora: false +dpo_loss_type: dpo_norm +dpo_beta: 5 +checkpointing_steps: 1000 \ No newline at end of file