|
75 | 75 | MT_BENCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-best.txt")
|
76 | 76 | MT_BENCH_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-branch-best.txt")
|
77 | 77 | MMLU_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mmlu-branch-best.txt")
|
78 |
| -CANDIDATE_MODEL_PATH = path.join( |
79 |
| - DATA_PVC_MOUNT_PATH, "model/output/phase_2/hf_format/candidate_model" |
| 78 | +CANDIDATE_MODEL_PATH_PREFIX = path.join( |
| 79 | + DATA_PVC_MOUNT_PATH, "model/output/phase_2/hf_format" |
80 | 80 | )
|
| 81 | +CANDIDATE_MODEL_PATH = path.join(CANDIDATE_MODEL_PATH_PREFIX, "candidate_model") |
81 | 82 | SDG_GENERATED_DATA_PATH = path.join(DATA_PVC_MOUNT_PATH, "generated")
|
82 | 83 | TAXONOMY_DATA_PATH = path.join(DATA_PVC_MOUNT_PATH, "taxonomy")
|
83 | 84 | # MMLU_SCORES_PATH = "/output/mmlu-results.txt" - after training phase 1 is done MMLU is not performed anymore
|
|
149 | 150 | --rdzv_endpoint $(MASTER_ADDR):$(MASTER_PORT) \
|
150 | 151 | -m instructlab.training.main_ds \
|
151 | 152 | --model_name_or_path="$PATH_TO_MODEL" \
|
152 |
| - --data_path=/data/processed_data/data.jsonl \ |
| 153 | + --data_path=/data/data/processed_data/data.jsonl \ |
153 | 154 | --output_dir={path_to_model}/output/phase_{phase_num} \
|
154 | 155 | --num_epochs={epoch_num} \
|
155 | 156 | --effective_batch_size=3840 \
|
|
221 | 222 | --rdzv_endpoint $(MASTER_ADDR):$(MASTER_PORT) \
|
222 | 223 | -m instructlab.training.main_ds \
|
223 | 224 | --model_name_or_path="$PATH_TO_MODEL" \
|
224 |
| - --data_path=/data/processed_data/data.jsonl \ |
| 225 | + --data_path=/data/data/processed_data/data.jsonl \ |
225 | 226 | --output_dir="$tmp_model" \
|
226 | 227 | --num_epochs={epoch_num} \
|
227 | 228 | --effective_batch_size=3840 \
|
@@ -1166,7 +1167,7 @@ def data_processing(train_args: TrainingArgs) -> None:
|
1166 | 1167 | data_processing(train_args=training_args)
|
1167 | 1168 | """
|
1168 | 1169 | exec_data_processing_op_args = f"""
|
1169 |
| -data_processing_op(max_seq_len={MAX_SEQ_LEN}, max_batch_len={MAX_BATCH_LEN}, sdg="{DATA_PVC_SDG_PATH}", model="{DATA_PVC_SDG_PATH}", processed_data="{PREPROCESSED_DATA_PATH}") |
| 1170 | +data_processing_op(max_seq_len={MAX_SEQ_LEN}, max_batch_len={MAX_BATCH_LEN}, sdg="{DATA_PVC_SDG_PATH}", model="{DATA_PVC_MODEL_PATH}", processed_data="{PREPROCESSED_DATA_PATH}") |
1170 | 1171 | """
|
1171 | 1172 |
|
1172 | 1173 | init_containers = [
|
@@ -1776,7 +1777,7 @@ def stop_vllm():
|
1776 | 1777 | return outputs(best_model=best_model, best_score=best_score)
|
1777 | 1778 | """
|
1778 | 1779 | exec_run_mt_bench_op_args = f"""
|
1779 |
| -run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",mt_bench_output="{MT_BENCH_OUTPUT_PATH}", models_folder="{CANDIDATE_MODEL_PATH}", models_path_prefix="{CANDIDATE_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}) |
| 1780 | +run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",mt_bench_output="{MT_BENCH_OUTPUT_PATH}",models_folder="{CANDIDATE_MODEL_PATH_PREFIX}",models_path_prefix="{CANDIDATE_MODEL_PATH_PREFIX}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}) |
1780 | 1781 | """
|
1781 | 1782 | exec_run_final_eval_op_command = """
|
1782 | 1783 | from typing import *
|
|
0 commit comments