Skip to content

Commit 1139d9b

Browse files
Merge pull request #107 from sallyom/small-fix-syntax
update data_dir for training in standalone.py, update data_processing_op_args in pipeline.py
2 parents d6abdf2 + 08bcb4c commit 1139d9b

File tree

3 files changed

+14
-12
lines changed

3 files changed

+14
-12
lines changed

pipeline.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -442,11 +442,11 @@ def gen_standalone():
442442

443443
# The list of executor names to extract details from to generate the standalone script
444444
executors = {
445-
"exec-data-processing-op": 'data_processing_op(max_seq_len={MAX_SEQ_LEN}, max_batch_len={MAX_BATCH_LEN}, sdg="{DATA_PVC_SDG_PATH}", model="{DATA_PVC_SDG_PATH}", processed_data="{PREPROCESSED_DATA_PATH}")',
445+
"exec-data-processing-op": 'data_processing_op(max_seq_len={MAX_SEQ_LEN}, max_batch_len={MAX_BATCH_LEN}, sdg="{DATA_PVC_SDG_PATH}", model="{DATA_PVC_MODEL_PATH}", processed_data="{PREPROCESSED_DATA_PATH}")',
446446
"exec-sdg-op": 'sdg_op(num_instructions_to_generate={num_instructions_to_generate}, repo_branch="{exec_git_clone_op_repo_branch}", repo_pr={exec_git_clone_op_repo_pr}, taxonomy="{TAXONOMY_DATA_PATH}", sdg="{SDG_GENERATED_DATA_PATH}")',
447447
"exec-git-clone-op": {},
448448
"exec-huggingface-importer-op": 'huggingface_importer_op(repo_name="{REPO_GRANITE_7B_IMAGE}", model="{DATA_PVC_MODEL_PATH}")',
449-
"exec-run-mt-bench-op": 'run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",mt_bench_output="{MT_BENCH_OUTPUT_PATH}", models_folder="{CANDIDATE_MODEL_PATH}", models_path_prefix="{CANDIDATE_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE})',
449+
"exec-run-mt-bench-op": 'run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",mt_bench_output="{MT_BENCH_OUTPUT_PATH}",models_folder="{CANDIDATE_MODEL_PATH_PREFIX}",models_path_prefix="{CANDIDATE_MODEL_PATH_PREFIX}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE})',
450450
"exec-run-final-eval-op": 'run_final_eval_op(mmlu_branch_output="{MMLU_BRANCH_SCORES_PATH}", mt_bench_branch_output="{MT_BENCH_OUTPUT_PATH}", candidate_model="{CANDIDATE_MODEL_PATH}", taxonomy="{TAXONOMY_PATH}", tasks="{DATA_PVC_SDG_PATH}", base_branch="", candidate_branch="", device=None, base_model_dir="{DATA_PVC_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}, model_dtype="{MODEL_DTYPE}", few_shots={FEW_SHOTS}, batch_size={BATCH_SIZE})',
451451
}
452452

standalone/standalone.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,10 @@
7575
MT_BENCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-best.txt")
7676
MT_BENCH_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-branch-best.txt")
7777
MMLU_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mmlu-branch-best.txt")
78-
CANDIDATE_MODEL_PATH = path.join(
79-
DATA_PVC_MOUNT_PATH, "model/output/phase_2/hf_format/candidate_model"
78+
CANDIDATE_MODEL_PATH_PREFIX = path.join(
79+
DATA_PVC_MOUNT_PATH, "model/output/phase_2/hf_format"
8080
)
81+
CANDIDATE_MODEL_PATH = path.join(CANDIDATE_MODEL_PATH_PREFIX, "candidate_model")
8182
SDG_GENERATED_DATA_PATH = path.join(DATA_PVC_MOUNT_PATH, "generated")
8283
TAXONOMY_DATA_PATH = path.join(DATA_PVC_MOUNT_PATH, "taxonomy")
8384
# MMLU_SCORES_PATH = "/output/mmlu-results.txt" - after training phase 1 is done MMLU is not performed anymore
@@ -149,7 +150,7 @@
149150
--rdzv_endpoint $(MASTER_ADDR):$(MASTER_PORT) \
150151
-m instructlab.training.main_ds \
151152
--model_name_or_path="$PATH_TO_MODEL" \
152-
--data_path=/data/processed_data/data.jsonl \
153+
--data_path=/data/data/processed_data/data.jsonl \
153154
--output_dir={path_to_model}/output/phase_{phase_num} \
154155
--num_epochs={epoch_num} \
155156
--effective_batch_size=3840 \
@@ -221,7 +222,7 @@
221222
--rdzv_endpoint $(MASTER_ADDR):$(MASTER_PORT) \
222223
-m instructlab.training.main_ds \
223224
--model_name_or_path="$PATH_TO_MODEL" \
224-
--data_path=/data/processed_data/data.jsonl \
225+
--data_path=/data/data/processed_data/data.jsonl \
225226
--output_dir="$tmp_model" \
226227
--num_epochs={epoch_num} \
227228
--effective_batch_size=3840 \
@@ -1166,7 +1167,7 @@ def data_processing(train_args: TrainingArgs) -> None:
11661167
data_processing(train_args=training_args)
11671168
"""
11681169
exec_data_processing_op_args = f"""
1169-
data_processing_op(max_seq_len={MAX_SEQ_LEN}, max_batch_len={MAX_BATCH_LEN}, sdg="{DATA_PVC_SDG_PATH}", model="{DATA_PVC_SDG_PATH}", processed_data="{PREPROCESSED_DATA_PATH}")
1170+
data_processing_op(max_seq_len={MAX_SEQ_LEN}, max_batch_len={MAX_BATCH_LEN}, sdg="{DATA_PVC_SDG_PATH}", model="{DATA_PVC_MODEL_PATH}", processed_data="{PREPROCESSED_DATA_PATH}")
11701171
"""
11711172

11721173
init_containers = [
@@ -1776,7 +1777,7 @@ def stop_vllm():
17761777
return outputs(best_model=best_model, best_score=best_score)
17771778
"""
17781779
exec_run_mt_bench_op_args = f"""
1779-
run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",mt_bench_output="{MT_BENCH_OUTPUT_PATH}", models_folder="{CANDIDATE_MODEL_PATH}", models_path_prefix="{CANDIDATE_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE})
1780+
run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",mt_bench_output="{MT_BENCH_OUTPUT_PATH}",models_folder="{CANDIDATE_MODEL_PATH_PREFIX}",models_path_prefix="{CANDIDATE_MODEL_PATH_PREFIX}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE})
17801781
"""
17811782
exec_run_final_eval_op_command = """
17821783
from typing import *

standalone/standalone.tpl

+5-4
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,10 @@ MT_BENCH_OUTPUT_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-results.txt")
7575
MT_BENCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-best.txt")
7676
MT_BENCH_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-branch-best.txt")
7777
MMLU_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mmlu-branch-best.txt")
78-
CANDIDATE_MODEL_PATH = path.join(
79-
DATA_PVC_MOUNT_PATH, "model/output/phase_2/hf_format/candidate_model"
78+
CANDIDATE_MODEL_PATH_PREFIX = path.join(
79+
DATA_PVC_MOUNT_PATH, "model/output/phase_2/hf_format"
8080
)
81+
CANDIDATE_MODEL_PATH = path.join(CANDIDATE_MODEL_PATH_PREFIX, "candidate_model")
8182
SDG_GENERATED_DATA_PATH = path.join(DATA_PVC_MOUNT_PATH, "generated")
8283
TAXONOMY_DATA_PATH = path.join(DATA_PVC_MOUNT_PATH, "taxonomy")
8384
# MMLU_SCORES_PATH = "/output/mmlu-results.txt" - after training phase 1 is done MMLU is not performed anymore
@@ -134,7 +135,7 @@ spec:
134135
--rdzv_endpoint $(MASTER_ADDR):$(MASTER_PORT) \
135136
-m instructlab.training.main_ds \
136137
--model_name_or_path="$PATH_TO_MODEL" \
137-
--data_path=/data/processed_data/data.jsonl \
138+
--data_path=/data/data/processed_data/data.jsonl \
138139
--output_dir={path_to_model}/output/phase_{phase_num} \
139140
--num_epochs={epoch_num} \
140141
--effective_batch_size=3840 \
@@ -206,7 +207,7 @@ spec:
206207
--rdzv_endpoint $(MASTER_ADDR):$(MASTER_PORT) \
207208
-m instructlab.training.main_ds \
208209
--model_name_or_path="$PATH_TO_MODEL" \
209-
--data_path=/data/processed_data/data.jsonl \
210+
--data_path=/data/data/processed_data/data.jsonl \
210211
--output_dir="$tmp_model" \
211212
--num_epochs={epoch_num} \
212213
--effective_batch_size=3840 \

0 commit comments

Comments
 (0)