1
1
#! /bin/bash
2
2
# $ -l rt_AF=2
3
- # $ -l h_rt=1:00 :00:00
3
+ # $ -l h_rt=0:01 :00:00
4
4
# $ -j y
5
- # $ -o outputs/instruction/swallow-7b /
5
+ # $ -o outputs/instruction/Llama-3-8B /
6
6
# $ -cwd
7
7
8
8
# module load
9
9
source /etc/profile.d/modules.sh
10
- module load cuda/11.8/11.8.0
11
- module load cudnn/8.9/8.9.2
12
- module load nccl/2.16/2.16.2-1
10
+ module use /bb/llm/gaf51275/modules/modulefiles
11
+
12
+ module load cuda/12.1/12.1.1
13
+ module load cudnn/cuda-12.1/9.0.0
14
+ module load nccl/2.20.5
13
15
module load hpcx/2.12
16
+ module load gcc/11.4.0
14
17
15
18
# swich virtual env
16
19
source .env/bin/activate
@@ -44,54 +47,52 @@ while read -r line; do
44
47
done < " $SGE_JOB_HOSTLIST " > " $HOSTFILE_NAME "
45
48
46
49
# training config
47
- SEQ_LENGTH=4096
50
+ SEQ_LENGTH=8192
48
51
DATA_PARALLEL_SIZE=$NUM_GPUS
49
52
50
- MICRO_BATCH_SIZE=4
51
- GLOBAL_BATCH_SIZE=64
53
+ MICRO_BATCH_SIZE=1
54
+ GLOBAL_BATCH_SIZE=128
52
55
53
56
# optimizer config
54
- LR=2e -5
55
- MIN_LR=2e -6
57
+ LR=1e -5
58
+ MIN_LR=1e -6
56
59
WEIGHT_DECAY=0.1
57
60
GRAD_CLIP=1
58
61
59
- # checkpoint & tokenizer
60
- TOKENIZER_MODEL=/bb/llm/gaf51275/llama/huggingface-checkpoint/Swallow-7b-hf/tokenizer.model
61
- CHECKPOINT_DIR=/bb/llm/gaf51275/llama/huggingface-checkpoint/Swallow-7b-hf
62
- CHECKPOINT_SAVE_DIR=" /bb/llm/gaf51275/llama /checkpoints/Swallow-7b-VE-chat/baseline-lr_ ${LR} -minlr_ ${MIN_LR} "
62
+ # checkpoint
63
+ TOKENIZER_DIR=/groups/gag51395/hf-checkpoints/Meta-Llama-3-8B-Instruct
64
+ CHECKPOINT_DIR=/groups/gag51395/hf-checkpoints/Meta-Llama-3-8B-Instruct
65
+ CHECKPOINT_SAVE_DIR=" /bb/llm/gaf51275/2024 /checkpoints/Llama-3-8B-Instruct-v0.2/LR_ ${LR} _MINLR_ ${MIN_LR} _WD_ ${WEIGHT_DECAY} _GC_ ${GRAD_CLIP }"
63
66
64
67
mkdir -p ${CHECKPOINT_SAVE_DIR}
65
68
66
69
# dataset
67
- DATASET_DIR=/bb/llm/gaf51275/llama/finetuning/datasets /training/baseline
70
+ DATASET_DIR=/groups/gag51395/datasets/instruction/2023-swallow /training/baseline
68
71
69
72
TRAIN_DATA_PATH=${DATASET_DIR} /train.jsonl
70
- VALID_DATA_PATH=${DATASET_DIR} /val .jsonl
73
+ VALID_DATA_PATH=${DATASET_DIR} /train .jsonl
71
74
72
75
# job name
73
- JOB_NAME=" Swallow-7b-VE-baseline- BS=${GLOBAL_BATCH_SIZE} -LR=${LR} -MINLR=${MIN_LR} "
76
+ JOB_NAME=" Llama-3-8B-instruct-v0.2- BS=${GLOBAL_BATCH_SIZE} -LR=${LR} -MINLR=${MIN_LR} -WD= ${WEIGHT_DECAY} -GC= ${GRAD_CLIP }"
74
77
75
78
# run
76
79
mpirun -np $NUM_GPUS \
77
80
--npernode $NUM_GPU_PER_NODE \
78
81
-hostfile $HOSTFILE_NAME \
79
82
-x MASTER_ADDR=$MASTER_ADDR \
80
83
-x MASTER_PORT=$MASTER_PORT \
81
- -bind-to none -map-by slot \
84
+ -bind-to none \
85
+ -x PATH \
86
+ -x LD_LIBRARY_PATH \
82
87
-x PATH \
83
88
python examples/finetuning.py \
84
89
--seq-length ${SEQ_LENGTH} \
85
- --sliding-window-size ${SEQ_LENGTH} \
86
90
--micro-batch-size ${MICRO_BATCH_SIZE} \
87
91
--global-batch-size ${GLOBAL_BATCH_SIZE} \
88
- --hf-transformer-model-dir ${CHECKPOINT_DIR} \
89
- --tokenizer-type Llama2Tokenizer \
90
- --tokenizer-model ${TOKENIZER_MODEL} \
92
+ --hf-transformer-model-dir ${TOKENIZER_DIR} \
91
93
--instruction-train-data-path ${TRAIN_DATA_PATH} \
92
94
--instruction-valid-data-path ${VALID_DATA_PATH} \
93
- --epoch 2 \
94
- --train-iters 500000 \
95
+ --epoch 1 \
95
96
--lr ${LR} \
96
97
--min-lr ${MIN_LR} \
97
98
--lr-decay-style cosine \
@@ -100,10 +101,10 @@ mpirun -np $NUM_GPUS \
100
101
--optimizer adam \
101
102
--adam-beta1 0.9 \
102
103
--adam-beta2 0.95 \
103
- --adam-eps 1e-6 \
104
+ --adam-eps 1e-8 \
104
105
--save-interval 500 \
105
- --eval-interval 100 \
106
- --eval-iters 20 \
106
+ --eval-interval 500 \
107
+ --eval-iters 10 \
107
108
--bf16 \
108
109
--mixed-precision \
109
110
--base-model ${CHECKPOINT_DIR} \
@@ -116,6 +117,6 @@ mpirun -np $NUM_GPUS \
116
117
--instruction-tuning \
117
118
--save-sampler-state \
118
119
--use-mpi \
119
- --wandb-entity " prj-jalm " \
120
- --wandb-project " Llama-2-7b-instruct " \
120
+ --wandb-entity " okoge " \
121
+ --wandb-project " llm-recipes " \
121
122
--wandb-name " ${JOB_NAME} "
0 commit comments