-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path2.data_process_train.sh
29 lines (26 loc) · 1.2 KB
/
2.data_process_train.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#train_data.json: MajorDataset.json or Minor Dataset.json or Both
experiment_name=Apollo2-7B_Prodata
log_folder=./logs/${experiment_name}
mkdir -p $log_folder
mkdir -p ./data/${experiment_name}
log_name=$(date +"%m-%d_%H-%M").log
python /path/to/src/prepare/data_process_train_prodata_qwen2.py \
--data_path /path/to/metadata/train/train_data.json \
--model_path /path/to/Qwen2-7B \
--wandb_log ./wandb_logs \
--max_seq_len 4096 \
--experiment_name ${experiment_name} \
--save_path ./data/${experiment_name}/Mixtrain_Prodata > ${log_folder}/$log_name 2>&1 &
##########################################################################
experiment_name=Apollo-MoE-0.5B_Prodata
log_folder=./logs/${experiment_name}
mkdir -p $log_folder
mkdir -p ./data/${experiment_name}
log_name=$(date +"%m-%d_%H-%M").log
python /path/to/src/variants/upcycling/language_family/data_process_train_prodata_qwen_language_family.py \
--data_path /path/to/metadata/train/train_data.json \
--model_path /path/to/Qwen2-0.5B \
--wandb_log ./wandb_logs \
--max_seq_len 4096 \
--experiment_name ${experiment_name} \
--save_path ./data/${experiment_name}/Mixtrain_Prodata > ${log_folder}/$log_name 2>&1 &