Skip to content

Commit

Permalink
Fix exp6 and exp8 scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
so298 committed Feb 17, 2024
1 parent eb631b6 commit c8197d4
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 27 deletions.
13 changes: 7 additions & 6 deletions sft-scripts/exp6_gpt4-self-inst_lora_all.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

# Single GPU sft script
# based on https://github.com/llm-jp/llm-jp-sft/blob/main/mdx/train_peft_single_gpu.sh
# Single Node sft script
# based on https://github.com/llm-jp/llm-jp-sft/blob/main/mdx/train_peft_multi_gpu.sh

set -eux

Expand All @@ -10,8 +10,6 @@ SCRIPT_PATH="llm-jp-sft/"
cd $SCRIPT_PATH
source venv/bin/activate

# Set CUDA Device (default: 0)
export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"

# Target Model (TODO: Change if needed)
TARGET_MODEL="/model/13B_HF/llm-jp-13b-cc-v2-63500step.code10K_en20K_ja30K_ver2.2" # exp6
Expand All @@ -24,18 +22,21 @@ export WANDB_PROJECT="13b-sft-and-eval"
export WANDB_NAME="sft-exp6-${SFT_TYPE}"

# Training settings
config_file="configs/accelerate_config_zero3.yaml"
dataset_path="./dataset"
dataset_sh="./mdx/dataset_gpt4_self_inst_ja.sh"
num_train_epochs=5
per_device_train_batch_size=1
gradient_accumulation_steps=64 # global_batch_size = per_device_train_batch_size * gradient_accumulation_steps = 64
gradient_accumulation_steps=8 # global_batch_size = per_device_train_batch_size * n_gpus * gradient_accumulation_steps = 64
peft_target_model="llama-all"
max_seq_length=4096 # for llama model
learning_rate="2e-5"
lr_scheduler_type="cosine"
warmup_ratio=0.1

python train.py \
# N_gpu = 8
accelerate launch --config_file $config_file \
train.py \
--model_name_or_path $TARGET_MODEL \
--tokenizer_name_or_path $TARGET_MODEL \
--use_peft \
Expand Down
14 changes: 7 additions & 7 deletions sft-scripts/exp6_gpt4-self-inst_lora_all_lr_1e-4.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

# Single GPU sft script
# based on https://github.com/llm-jp/llm-jp-sft/blob/main/mdx/train_peft_single_gpu.sh
# Single Node sft script
# based on https://github.com/llm-jp/llm-jp-sft/blob/main/mdx/train_peft_multi_gpu.sh

set -eux

Expand All @@ -10,9 +10,6 @@ SCRIPT_PATH="llm-jp-sft/"
cd $SCRIPT_PATH
source venv/bin/activate

# Set CUDA Device (default: 0)
export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"

# Target Model (TODO: Change if needed)
TARGET_MODEL="/model/13B_HF/llm-jp-13b-cc-v2-63500step.code10K_en20K_ja30K_ver2.2" # exp6
SFT_TYPE="lora-all-gpt4-self-inst-lr_1e-4"
Expand All @@ -24,18 +21,21 @@ export WANDB_PROJECT="13b-sft-and-eval"
export WANDB_NAME="sft-exp6-${SFT_TYPE}"

# Training settings
config_file="configs/accelerate_config_zero3.yaml"
dataset_path="./dataset"
dataset_sh="./mdx/dataset_gpt4_self_inst_ja.sh"
num_train_epochs=5
per_device_train_batch_size=1
gradient_accumulation_steps=64 # global_batch_size = per_device_train_batch_size * gradient_accumulation_steps = 64
gradient_accumulation_steps=8 # global_batch_size = per_device_train_batch_size * n_gpus * gradient_accumulation_steps = 64
peft_target_model="llama-all"
max_seq_length=4096 # for llama model
learning_rate="1e-4"
lr_scheduler_type="cosine"
warmup_ratio=0.1

python train.py \
# N_gpu = 8
accelerate launch --config_file $config_file \
train.py \
--model_name_or_path $TARGET_MODEL \
--tokenizer_name_or_path $TARGET_MODEL \
--use_peft \
Expand Down
14 changes: 7 additions & 7 deletions sft-scripts/exp8_gpt4-self-inst_lora_all.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

# Single GPU sft script
# based on https://github.com/llm-jp/llm-jp-sft/blob/main/mdx/train_peft_single_gpu.sh
# Single Node sft script
# based on https://github.com/llm-jp/llm-jp-sft/blob/main/mdx/train_peft_multi_gpu.sh

set -eux

Expand All @@ -10,9 +10,6 @@ SCRIPT_PATH="llm-jp-sft/"
cd $SCRIPT_PATH
source venv/bin/activate

# Set CUDA Device (default: 0)
export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"

# Target Model (TODO: Change if needed)
TARGET_MODEL="/model/13B_HF/llm-jp-13b-cc-v2-beta-61000step.code20K_en40K_ja60K_ver2.2/" # exp8
SFT_TYPE="lora-all-gpt4-self-inst"
Expand All @@ -24,18 +21,21 @@ export WANDB_PROJECT="13b-sft-and-eval"
export WANDB_NAME="sft-exp8-${SFT_TYPE}"

# Training settings
config_file="configs/accelerate_config_zero3.yaml"
dataset_path="./dataset"
dataset_sh="./mdx/dataset_gpt4_self_inst_ja.sh"
num_train_epochs=5
per_device_train_batch_size=1
gradient_accumulation_steps=64 # global_batch_size = per_device_train_batch_size * gradient_accumulation_steps = 64
gradient_accumulation_steps=8 # global_batch_size = per_device_train_batch_size * n_gpus * gradient_accumulation_steps = 64
peft_target_model="llama-all"
max_seq_length=4096 # for llama model
learning_rate="2e-5"
lr_scheduler_type="cosine"
warmup_ratio=0.1

python train.py \
# N_gpu = 8
accelerate launch --config_file $config_file \
train.py \
--model_name_or_path $TARGET_MODEL \
--tokenizer_name_or_path $TARGET_MODEL \
--use_peft \
Expand Down
14 changes: 7 additions & 7 deletions sft-scripts/exp8_gpt4-self-inst_lora_all_lr_1e-4.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

# Single GPU sft script
# based on https://github.com/llm-jp/llm-jp-sft/blob/main/mdx/train_peft_single_gpu.sh
# Single Node sft script
# based on https://github.com/llm-jp/llm-jp-sft/blob/main/mdx/train_peft_multi_gpu.sh

set -eux

Expand All @@ -10,9 +10,6 @@ SCRIPT_PATH="llm-jp-sft/"
cd $SCRIPT_PATH
source venv/bin/activate

# Set CUDA Device (default: 0)
export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"

# Target Model (TODO: Change if needed)
TARGET_MODEL="/model/13B_HF/llm-jp-13b-cc-v2-beta-61000step.code20K_en40K_ja60K_ver2.2/" # exp8
SFT_TYPE="lora-all-gpt4-self-inst-lr_1e-4"
Expand All @@ -24,18 +21,21 @@ export WANDB_PROJECT="13b-sft-and-eval"
export WANDB_NAME="sft-exp8-${SFT_TYPE}"

# Training settings
config_file="configs/accelerate_config_zero3.yaml"
dataset_path="./dataset"
dataset_sh="./mdx/dataset_gpt4_self_inst_ja.sh"
num_train_epochs=5
per_device_train_batch_size=1
gradient_accumulation_steps=64 # global_batch_size = per_device_train_batch_size * gradient_accumulation_steps = 64
gradient_accumulation_steps=8 # global_batch_size = per_device_train_batch_size * n_gpus * gradient_accumulation_steps = 64
peft_target_model="llama-all"
max_seq_length=4096 # for llama model
learning_rate="1e-4"
lr_scheduler_type="cosine"
warmup_ratio=0.1

python train.py \
# N_gpu = 8
accelerate launch --config_file $config_file \
train.py \
--model_name_or_path $TARGET_MODEL \
--tokenizer_name_or_path $TARGET_MODEL \
--use_peft \
Expand Down

0 comments on commit c8197d4

Please sign in to comment.