Skip to content

Commit

Permalink
Merge pull request #107 from sallyom/small-fix-syntax
Browse files Browse the repository at this point in the history
update data_dir for training in standalone.py, update data_processing_op_args in pipeline.py
  • Loading branch information
MichaelClifford authored Oct 18, 2024
2 parents d6abdf2 + 08bcb4c commit 1139d9b
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 12 deletions.
4 changes: 2 additions & 2 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,11 +442,11 @@ def gen_standalone():

# The list of executor names to extract details from to generate the standalone script
executors = {
"exec-data-processing-op": 'data_processing_op(max_seq_len={MAX_SEQ_LEN}, max_batch_len={MAX_BATCH_LEN}, sdg="{DATA_PVC_SDG_PATH}", model="{DATA_PVC_SDG_PATH}", processed_data="{PREPROCESSED_DATA_PATH}")',
"exec-data-processing-op": 'data_processing_op(max_seq_len={MAX_SEQ_LEN}, max_batch_len={MAX_BATCH_LEN}, sdg="{DATA_PVC_SDG_PATH}", model="{DATA_PVC_MODEL_PATH}", processed_data="{PREPROCESSED_DATA_PATH}")',
"exec-sdg-op": 'sdg_op(num_instructions_to_generate={num_instructions_to_generate}, repo_branch="{exec_git_clone_op_repo_branch}", repo_pr={exec_git_clone_op_repo_pr}, taxonomy="{TAXONOMY_DATA_PATH}", sdg="{SDG_GENERATED_DATA_PATH}")',
"exec-git-clone-op": {},
"exec-huggingface-importer-op": 'huggingface_importer_op(repo_name="{REPO_GRANITE_7B_IMAGE}", model="{DATA_PVC_MODEL_PATH}")',
"exec-run-mt-bench-op": 'run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",mt_bench_output="{MT_BENCH_OUTPUT_PATH}", models_folder="{CANDIDATE_MODEL_PATH}", models_path_prefix="{CANDIDATE_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE})',
"exec-run-mt-bench-op": 'run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",mt_bench_output="{MT_BENCH_OUTPUT_PATH}",models_folder="{CANDIDATE_MODEL_PATH_PREFIX}",models_path_prefix="{CANDIDATE_MODEL_PATH_PREFIX}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE})',
"exec-run-final-eval-op": 'run_final_eval_op(mmlu_branch_output="{MMLU_BRANCH_SCORES_PATH}", mt_bench_branch_output="{MT_BENCH_OUTPUT_PATH}", candidate_model="{CANDIDATE_MODEL_PATH}", taxonomy="{TAXONOMY_PATH}", tasks="{DATA_PVC_SDG_PATH}", base_branch="", candidate_branch="", device=None, base_model_dir="{DATA_PVC_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}, model_dtype="{MODEL_DTYPE}", few_shots={FEW_SHOTS}, batch_size={BATCH_SIZE})',
}

Expand Down
13 changes: 7 additions & 6 deletions standalone/standalone.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,10 @@
MT_BENCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-best.txt")
MT_BENCH_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-branch-best.txt")
MMLU_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mmlu-branch-best.txt")
CANDIDATE_MODEL_PATH = path.join(
DATA_PVC_MOUNT_PATH, "model/output/phase_2/hf_format/candidate_model"
CANDIDATE_MODEL_PATH_PREFIX = path.join(
DATA_PVC_MOUNT_PATH, "model/output/phase_2/hf_format"
)
CANDIDATE_MODEL_PATH = path.join(CANDIDATE_MODEL_PATH_PREFIX, "candidate_model")
SDG_GENERATED_DATA_PATH = path.join(DATA_PVC_MOUNT_PATH, "generated")
TAXONOMY_DATA_PATH = path.join(DATA_PVC_MOUNT_PATH, "taxonomy")
# MMLU_SCORES_PATH = "/output/mmlu-results.txt" - after training phase 1 is done MMLU is not performed anymore
Expand Down Expand Up @@ -149,7 +150,7 @@
--rdzv_endpoint $(MASTER_ADDR):$(MASTER_PORT) \
-m instructlab.training.main_ds \
--model_name_or_path="$PATH_TO_MODEL" \
--data_path=/data/processed_data/data.jsonl \
--data_path=/data/data/processed_data/data.jsonl \
--output_dir={path_to_model}/output/phase_{phase_num} \
--num_epochs={epoch_num} \
--effective_batch_size=3840 \
Expand Down Expand Up @@ -221,7 +222,7 @@
--rdzv_endpoint $(MASTER_ADDR):$(MASTER_PORT) \
-m instructlab.training.main_ds \
--model_name_or_path="$PATH_TO_MODEL" \
--data_path=/data/processed_data/data.jsonl \
--data_path=/data/data/processed_data/data.jsonl \
--output_dir="$tmp_model" \
--num_epochs={epoch_num} \
--effective_batch_size=3840 \
Expand Down Expand Up @@ -1166,7 +1167,7 @@ def data_processing(train_args: TrainingArgs) -> None:
data_processing(train_args=training_args)
"""
exec_data_processing_op_args = f"""
data_processing_op(max_seq_len={MAX_SEQ_LEN}, max_batch_len={MAX_BATCH_LEN}, sdg="{DATA_PVC_SDG_PATH}", model="{DATA_PVC_SDG_PATH}", processed_data="{PREPROCESSED_DATA_PATH}")
data_processing_op(max_seq_len={MAX_SEQ_LEN}, max_batch_len={MAX_BATCH_LEN}, sdg="{DATA_PVC_SDG_PATH}", model="{DATA_PVC_MODEL_PATH}", processed_data="{PREPROCESSED_DATA_PATH}")
"""

init_containers = [
Expand Down Expand Up @@ -1776,7 +1777,7 @@ def stop_vllm():
return outputs(best_model=best_model, best_score=best_score)
"""
exec_run_mt_bench_op_args = f"""
run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",mt_bench_output="{MT_BENCH_OUTPUT_PATH}", models_folder="{CANDIDATE_MODEL_PATH}", models_path_prefix="{CANDIDATE_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE})
run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",mt_bench_output="{MT_BENCH_OUTPUT_PATH}",models_folder="{CANDIDATE_MODEL_PATH_PREFIX}",models_path_prefix="{CANDIDATE_MODEL_PATH_PREFIX}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE})
"""
exec_run_final_eval_op_command = """
from typing import *
Expand Down
9 changes: 5 additions & 4 deletions standalone/standalone.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,10 @@ MT_BENCH_OUTPUT_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-results.txt")
MT_BENCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-best.txt")
MT_BENCH_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-branch-best.txt")
MMLU_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mmlu-branch-best.txt")
CANDIDATE_MODEL_PATH = path.join(
DATA_PVC_MOUNT_PATH, "model/output/phase_2/hf_format/candidate_model"
CANDIDATE_MODEL_PATH_PREFIX = path.join(
DATA_PVC_MOUNT_PATH, "model/output/phase_2/hf_format"
)
CANDIDATE_MODEL_PATH = path.join(CANDIDATE_MODEL_PATH_PREFIX, "candidate_model")
SDG_GENERATED_DATA_PATH = path.join(DATA_PVC_MOUNT_PATH, "generated")
TAXONOMY_DATA_PATH = path.join(DATA_PVC_MOUNT_PATH, "taxonomy")
# MMLU_SCORES_PATH = "/output/mmlu-results.txt" - after training phase 1 is done MMLU is not performed anymore
Expand Down Expand Up @@ -134,7 +135,7 @@ spec:
--rdzv_endpoint $(MASTER_ADDR):$(MASTER_PORT) \
-m instructlab.training.main_ds \
--model_name_or_path="$PATH_TO_MODEL" \
--data_path=/data/processed_data/data.jsonl \
--data_path=/data/data/processed_data/data.jsonl \
--output_dir={path_to_model}/output/phase_{phase_num} \
--num_epochs={epoch_num} \
--effective_batch_size=3840 \
Expand Down Expand Up @@ -206,7 +207,7 @@ spec:
--rdzv_endpoint $(MASTER_ADDR):$(MASTER_PORT) \
-m instructlab.training.main_ds \
--model_name_or_path="$PATH_TO_MODEL" \
--data_path=/data/processed_data/data.jsonl \
--data_path=/data/data/processed_data/data.jsonl \
--output_dir="$tmp_model" \
--num_epochs={epoch_num} \
--effective_batch_size=3840 \
Expand Down

0 comments on commit 1139d9b

Please sign in to comment.