Skip to content

Commit

Permalink
fix: add final details to push model to S3
Browse files Browse the repository at this point in the history
Signed-off-by: Sébastien Han <[email protected]>
  • Loading branch information
leseb committed Oct 14, 2024
1 parent b3dd5a4 commit 424f7aa
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 15 deletions.
4 changes: 2 additions & 2 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1104,7 +1104,7 @@ deploymentSpec:
\ if gpu_available\n else \"No GPU available\"\n )\n \
\ gpu_count = torch.cuda.device_count() if gpu_available else 0\n\n \
\ print(f\"GPU Available: {gpu_available}, Using: {gpu_name}\")\n\n #\
\ MMLU_BRANCH\n\n # This is very specific to `ilab generate`, necessary\
\ MMLU_BRANCH\n\n # This is very specific to 'ilab generate', necessary\
\ because the data generation and\n # model evaluation are taking place\
\ in separate environments.\n def update_test_lines_in_files(base_dir):\n\
\ import os\n import re\n\n # Define the regex to match\
Expand All @@ -1129,7 +1129,7 @@ deploymentSpec:
\ = []\n regex = re.compile(pattern)\n\n for root, dirs, files\
\ in os.walk(base_dir):\n for directory in dirs:\n \
\ if regex.search(directory):\n matching_dirs.append(os.path.join(root,\
\ directory))\n\n # From `ilab sdg` the knowledge_*_task.yaml files\
\ directory))\n\n # From 'ilab sdg' the knowledge_*_task.yaml files\
\ have a line that references where the SDG took place.\n # This\
\ needs to be updated to run elsewhere.\n # The line is:\n \
\ # test: /path/to/where/sdg/occured/node_datasets_*\n # TODO:\
Expand Down
21 changes: 13 additions & 8 deletions standalone/standalone.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@
MT_BENCH_OUTPUT_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-results.txt")
MT_BENCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-best.txt")
MT_BENCH_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-branch-best.txt")
MMLU_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mmlu-branch-best.txt")
CANDIDATE_MODEL_PATH = path.join(
DATA_PVC_MOUNT_PATH, "model/output/hf_format/candidate_model"
)
SDG_OBJECT_STORE_SECRET_NAME = "sdg-object-store-credentials"
KFP_MODEL_SERVER_CM = """
# TODO: remove the following line and replace it with the actual ConfigMap/Secret
Expand Down Expand Up @@ -268,7 +272,10 @@
echo "Final data tarball path: $FINAL_DATA_TAR_PATH"
echo "Final data tarball file: $FINAL_DATA_TAR_FILE"
echo "Archiving data before pushing to the object store"
tar --create --gzip --verbose --file "$FINAL_DATA_TAR_PATH" {mt_bench_output_path} {mt_bench_scores_path} {mt_bench_branch_scores_path} {data_pvc_mount_path}/model
tar --create \
--gzip \
--verbose \
--file "$FINAL_DATA_TAR_PATH" {mt_bench_output_path} {mt_bench_scores_path} {mt_bench_branch_scores_path} {mmlu_branch_scores_path} {candidate_model_path}
# TODO: change model path for the final model!!!
fi
Expand Down Expand Up @@ -843,11 +850,7 @@ def run(
scores = json.loads(scores)
logger.info("Best model: %s", scores.get("best_model"))
ctx.obj["candidate_model"] = scores.get("best_model")

# Push the best model to S3
# TODO
logger.info("instructLab Training Finished!")
return 0

# Push the best model to S3
ctx.invoke(upload_trained_model)
Expand Down Expand Up @@ -1271,6 +1274,8 @@ def data_processing(train_args: TrainingArgs) -> None:
mt_bench_output_path=MT_BENCH_OUTPUT_PATH,
mt_bench_scores_path=MT_BENCH_SCORES_PATH,
mt_bench_branch_scores_path=MT_BENCH_BRANCH_SCORES_PATH,
mmlu_branch_scores_path=MMLU_BRANCH_SCORES_PATH,
candidate_model=CANDIDATE_MODEL_PATH,
)
],
volume_mounts=get_vol_mount(),
Expand Down Expand Up @@ -1845,7 +1850,7 @@ def branch_eval_summary_to_json(
# MMLU_BRANCH
# This is very specific to `ilab generate`, necessary because the data generation and
# This is very specific to 'ilab generate', necessary because the data generation and
# model evaluation are taking place in separate environments.
def update_test_lines_in_files(base_dir):
import os
Expand Down Expand Up @@ -1891,7 +1896,7 @@ def find_node_dataset_directories(base_dir: str):
if regex.search(directory):
matching_dirs.append(os.path.join(root, directory))
# From `ilab sdg` the knowledge_*_task.yaml files have a line that references where the SDG took place.
# From 'ilab sdg' the knowledge_*_task.yaml files have a line that references where the SDG took place.
# This needs to be updated to run elsewhere.
# The line is:
# test: /path/to/where/sdg/occured/node_datasets_*
Expand Down Expand Up @@ -2119,7 +2124,7 @@ def find_node_dataset_directories(base_dir: str):
json.dump(mt_bench_branch_data, f, indent=4)
"""
exec_run_final_eval_op_args = """
run_final_eval_op(candidate_model='/data/model/output/hf_format/candidate_model', taxonomy='/data/taxonomy', tasks='/data/generated', base_branch='', candidate_branch='', device=None, base_model_dir='/model/model', max_workers='auto', merge_system_user_message=False, model_dtype='bfloat16', few_shots=5, batch_size=8)
run_final_eval_op(mmlu_branch_output='/data/mmlu-branch-best.txt',mt_bench_branch_output='/data/mt-bench-branch-best.txt',candidate_model='/data/model/output/hf_format/candidate_model', taxonomy='/data/taxonomy', tasks='/data/generate', base_branch='', candidate_branch='', device=None, base_model_dir='/data/model', max_workers='auto', merge_system_user_message=False, model_dtype='bfloat16', few_shots=5, batch_size=8)
"""

if eval_type == "mt-bench":
Expand Down
13 changes: 8 additions & 5 deletions standalone/standalone.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ PYTORCH_NNODES = 2
MT_BENCH_OUTPUT_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-results.txt")
MT_BENCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-best.txt")
MT_BENCH_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mt-bench-branch-best.txt")
MMLU_BRANCH_SCORES_PATH = path.join(DATA_PVC_MOUNT_PATH, "mmlu-branch-best.txt")
CANDIDATE_MODEL_PATH = path.join(DATA_PVC_OUTPUT_PATH, "hf_format/candidate_model")
SDG_OBJECT_STORE_SECRET_NAME = "sdg-object-store-credentials"
KFP_MODEL_SERVER_CM = """
# TODO: remove the following line and replace it with the actual ConfigMap/Secret
Expand Down Expand Up @@ -253,7 +255,10 @@ if [ "$STRATEGY" == "upload" ]; then
echo "Final data tarball path: $FINAL_DATA_TAR_PATH"
echo "Final data tarball file: $FINAL_DATA_TAR_FILE"
echo "Archiving data before pushing to the object store"
tar --create --gzip --verbose --file "$FINAL_DATA_TAR_PATH" {mt_bench_output_path} {mt_bench_scores_path} {mt_bench_branch_scores_path} {data_pvc_mount_path}/model
tar --create \
--gzip \
--verbose \
--file "$FINAL_DATA_TAR_PATH" {mt_bench_output_path} {mt_bench_scores_path} {mt_bench_branch_scores_path} {mmlu_branch_scores_path} {candidate_model_path}
# TODO: change model path for the final model!!!
fi
Expand Down Expand Up @@ -828,11 +833,7 @@ def run(
scores = json.loads(scores)
logger.info("Best model: %s", scores.get("best_model"))
ctx.obj["candidate_model"] = scores.get("best_model")

# Push the best model to S3
# TODO
logger.info("instructLab Training Finished!")
return 0

# Push the best model to S3
ctx.invoke(upload_trained_model)
Expand Down Expand Up @@ -1083,6 +1084,8 @@ def create_data_job(
mt_bench_output_path=MT_BENCH_OUTPUT_PATH,
mt_bench_scores_path=MT_BENCH_SCORES_PATH,
mt_bench_branch_scores_path=MT_BENCH_BRANCH_SCORES_PATH,
mmlu_branch_scores_path=MMLU_BRANCH_SCORES_PATH,
candidate_model=CANDIDATE_MODEL_PATH,
)
],
volume_mounts=get_vol_mount(),
Expand Down

0 comments on commit 424f7aa

Please sign in to comment.