Merge branch 'maanug/refactor-metric-check' into 'main'

Move metrics pytest into individual jobs See merge request ADLR/megatron-lm!1257
TrustLLMeu · Mar 25, 2024 · 7d40bd3 · 7d40bd3
2 parents 3a70d14 + 212ce8d
commit 7d40bd3
Show file tree

Hide file tree

Showing 13 changed files with 94 additions and 33 deletions.
diff --git a/jet-tests.yml b/jet-tests.yml
@@ -63,7 +63,7 @@ jet-trigger:
     JET_WORKLOADS_FILTER: "$_JET_FILTER"
 
 
-jet-functional-results:
+jet-results-summary:
   stage: jet
   image: gitlab-master.nvidia.com:5005/dl/jet/api:latest
   tags:
@@ -80,15 +80,3 @@ jet-functional-results:
     - if: $JET_CUSTOM_FILTER != "" && $CI_PIPELINE_SOURCE != 'merge_request_event'
       when: always
     - when: never
-
-jet-compare-metrics:
-  extends: .jet_common
-  image: gitlab-master.nvidia.com:5005/dl/jet/api:latest
-  tags:
-    - os/linux
-  needs: [ jet-functional-results ]
-  before_script:
-    - jet secrets jwt-login jwt/nvidia/gitlab-master adlr-megatron-lm-ci $CI_JOB_JWT
-  script:
-    - python -m pip install -U --no-cache-dir pytest tensorboard
-    - python tests/functional_tests/python_test_utils/jet_test_pipeline.py ${CI_PIPELINE_ID} --test metrics
diff --git a/tests/functional_tests/jet_recipes/MR-bert.yaml b/tests/functional_tests/jet_recipes/MR-bert.yaml
@@ -45,9 +45,8 @@ spec:
         MBS={micro_batch_size} \
         GBS={batch_size} \
         CHECKPOINT_RESUME_TEST={checkpoint_resume_test} \
-        ADDITIONAL_PARAMS={extra_args if extra_args is not None else '""'} && \
-        python3 ./tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py {assets_dir} "" | \
-        tee {assets_dir}/results.json
+        JOB_NAME={key.split("/")[1]} \
+        ADDITIONAL_PARAMS={extra_args if extra_args is not None else '""'}
 products:
   # MCore
   - {tp_size: [2], pp_size: [2]}

diff --git a/tests/functional_tests/jet_recipes/MR-gpt.yaml b/tests/functional_tests/jet_recipes/MR-gpt.yaml
@@ -51,9 +51,8 @@ spec:
         MOE_GROUPED_GEMM={moe_grouped_gemm} \
         CKPT_FORMAT={ckpt_format} \
         CHECKPOINT_RESUME_TEST={checkpoint_resume_test} \
-        ADDITIONAL_PARAMS={extra_args if extra_args is not None else '""'} && \
-        python3 ./tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py {assets_dir} "" | \
-        tee {assets_dir}/results.json
+        JOB_NAME={key.split("/")[1]} \
+        ADDITIONAL_PARAMS={extra_args if extra_args is not None else '""'}
 products:
   # MCore
   - {tp_size: [2], pp_size: [2]}

diff --git a/tests/functional_tests/jet_recipes/MR-t5.yaml b/tests/functional_tests/jet_recipes/MR-t5.yaml
@@ -43,8 +43,7 @@ spec:
         VP_SIZE={vp_size if vp_size is not None else '""'} \
         MBS={micro_batch_size} \
         GBS={batch_size} \
-        ADDITIONAL_PARAMS={extra_args if extra_args is not None else '""'} && \
-        python3 ./tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py {assets_dir} "" | \
-        tee {assets_dir}/results.json
+        JOB_NAME={key.split("/")[1]} \
+        ADDITIONAL_PARAMS={extra_args if extra_args is not None else '""'}
 products:
   - {use_te: [True], tp_size: [1],  pp_size: [1], vp_size: [1]}
diff --git a/tests/functional_tests/jet_recipes/monthly-t5.yaml b/tests/functional_tests/jet_recipes/monthly-t5.yaml
@@ -45,9 +45,8 @@ spec:
         MBS={micro_batch_size} \
         GBS={batch_size} \
         CHECKPOINT_RESUME_TEST={checkpoint_resume_test} \
-        ADDITIONAL_PARAMS={extra_args if extra_args is not None else '""'} && \
-        python3 ./tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py {assets_dir} "" | \
-        tee {assets_dir}/results.json
+        JOB_NAME={key.split("/")[1]} \
+        ADDITIONAL_PARAMS={extra_args if extra_args is not None else '""'}
 products:
   - { tp_size: [1,2], pp_size: [1], vp_size: [1] }
   - {use_te: [True], tp_size: [2], pp_size: [1], vp_size: [1]}

diff --git a/tests/functional_tests/jet_recipes/nightly-bert.yaml b/tests/functional_tests/jet_recipes/nightly-bert.yaml
@@ -43,9 +43,8 @@ spec:
         VP_SIZE={vp_size if vp_size is not None else '""'} \
         MBS={micro_batch_size} \
         GBS={batch_size} \
-        ADDITIONAL_PARAMS={extra_args if extra_args is not None else '""'} && \
-        python3 ./tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py {assets_dir} "" | \
-        tee {assets_dir}/results.json
+        JOB_NAME={key.split("/")[1]} \
+        ADDITIONAL_PARAMS={extra_args if extra_args is not None else '""'}
 products:
   - {tp_size: [1], pp_size: [4], vp_size: [2]}
   - {use_mcore: [True, False], tp_size: [4], pp_size: [1]}

diff --git a/tests/functional_tests/jet_recipes/nightly-gpt.yaml b/tests/functional_tests/jet_recipes/nightly-gpt.yaml
@@ -47,9 +47,8 @@ spec:
         MBS={micro_batch_size} \
         GBS={batch_size} \
         MOE_GROUPED_GEMM={moe_grouped_gemm} \
-        ADDITIONAL_PARAMS={extra_args if extra_args is not None else '""'} && \
-        python3 ./tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py {assets_dir} "" | \
-        tee {assets_dir}/results.json
+        JOB_NAME={key.split("/")[1]} \
+        ADDITIONAL_PARAMS={extra_args if extra_args is not None else '""'}
 products:
   - {use_mcore: [True, False], tp_size: [4], pp_size: [1]}
   - {use_mcore: [True, False], tp_size: [1], pp_size: [2,4]}

diff --git a/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py b/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py
@@ -59,9 +59,8 @@ def collect_train_test_metrics(logs_dir, run_name):
         },
         "iteration_timing_avg": iteration_time_avg,
     }
-    model_name = run_name.split('_')[0]
     str_train_metrics = str(train_metrics).replace("'", "\"")
-    print(f"\n ----------- Store the following metrics in tests/functional_tests/test_results/${model_name}/{run_name}.json ----------")
+    print(f"\n ----------- Store the following metrics in tests/functional_tests/test_results/jet/{run_name}.json ----------")
     print(f"\n {str_train_metrics}", flush=True)
 
 if __name__ == '__main__':

diff --git a/tests/functional_tests/python_test_utils/jet_test_pipeline.py b/tests/functional_tests/python_test_utils/jet_test_pipeline.py
@@ -44,10 +44,18 @@ def check_exitcodes(results):
     exit_codes = []
     log_urls = []
     names = []
+    metrics_file_urls = []
     for result in results:
         exit_codes.append(result.get('l_exit_code', -1))
         log_urls.append(select_asset(result, 'output_script-0.log'))
         names.append(result['obj_workload']['s_key'].split('basic/')[-1])
+        metrics_file_urls.append(select_asset(result, 'results.json'))
+
+    metrics_table = PrettyTable()
+    metrics_table.add_column("Job Key", names)
+    metrics_table.add_column("Results Data", metrics_file_urls)
+    metrics_table.align["Job Key"] = 'l'
+    print(metrics_table)
 
     table = PrettyTable()
     table.add_column("Job Key", names)

diff --git a/tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_test.sh b/tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_test.sh
@@ -103,3 +103,21 @@ echo "--------------------------------------------------------------------------
 
 echo "$command" > $SCRIPTS_DIR/pretrain_bert_distributed_command.sh
 eval $command
+
+echo "Saving test results to $TENSORBOARD_DIR"
+python3 ./tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py $TENSORBOARD_DIR "$JOB_NAME" | \
+    tee ${TENSORBOARD_DIR}/results.json
+
+if [[ $SKIP_PYTEST != 1 ]]; then
+    echo "-----------------------------------------------------------------------------"
+    if [[ $CHECKPOINT_RESUME_TEST -eq 1 ]]; then
+        echo "Running pytest 1st vs 2nd run comparison"
+        export LOGS_DIR=$TENSORBOARD_DIR
+        pytest ./tests/functional_tests/python_test_utils/test_resume_checkpoint_pipeline.py
+    else
+        echo "Running pytest checks against golden values"
+        export EXPECTED_METRICS_FILE="./tests/functional_tests/test_results/jet/${JOB_NAME}.json"
+        export LOGS_DIR=$TENSORBOARD_DIR
+        pytest ./tests/functional_tests/python_test_utils/test_ci_pipeline.py
+    fi
+fi
diff --git a/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_test.sh b/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_test.sh
@@ -161,3 +161,21 @@ echo "--------------------------------------------------------------------------
 
 echo "$command" > $SCRIPTS_DIR/pretrain_gpt3_distributed_command.sh
 eval $command
+
+echo "Saving test results to $TENSORBOARD_DIR"
+python3 ./tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py $TENSORBOARD_DIR "$JOB_NAME" | \
+    tee ${TENSORBOARD_DIR}/results.json
+
+if [[ $SKIP_PYTEST != 1 ]]; then
+    echo "-----------------------------------------------------------------------------"
+    if [[ $CHECKPOINT_RESUME_TEST -eq 1 ]]; then
+        echo "Running pytest 1st vs 2nd run comparison"
+        export LOGS_DIR=$TENSORBOARD_DIR
+        pytest ./tests/functional_tests/python_test_utils/test_resume_checkpoint_pipeline.py
+    else
+        echo "Running pytest checks against golden values"
+        export EXPECTED_METRICS_FILE="./tests/functional_tests/test_results/jet/${JOB_NAME}.json"
+        export LOGS_DIR=$TENSORBOARD_DIR
+        pytest ./tests/functional_tests/python_test_utils/test_ci_pipeline.py
+    fi
+fi
diff --git a/tests/functional_tests/test_scripts/retro/pretrain_retro_distributed_test.sh b/tests/functional_tests/test_scripts/retro/pretrain_retro_distributed_test.sh
@@ -148,3 +148,21 @@ pip install faiss-gpu
 
 echo "$command" > $SCRIPTS_DIR/pretrain_retro_distributed_command.sh
 eval $command
+
+echo "Saving test results to $TENSORBOARD_DIR"
+python3 ./tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py $TENSORBOARD_DIR "$JOB_NAME" | \
+    tee ${TENSORBOARD_DIR}/results.json
+
+if [[ $SKIP_PYTEST != 1 ]]; then
+    echo "-----------------------------------------------------------------------------"
+    if [[ $CHECKPOINT_RESUME_TEST -eq 1 ]]; then
+        echo "Running pytest 1st vs 2nd run comparison"
+        export LOGS_DIR=$TENSORBOARD_DIR
+        pytest ./tests/functional_tests/python_test_utils/test_resume_checkpoint_pipeline.py
+    else
+        echo "Running pytest checks against golden values"
+        export EXPECTED_METRICS_FILE="./tests/functional_tests/test_results/jet/${JOB_NAME}.json"
+        export LOGS_DIR=$TENSORBOARD_DIR
+        pytest ./tests/functional_tests/python_test_utils/test_ci_pipeline.py
+    fi
+fi
diff --git a/tests/functional_tests/test_scripts/t5/pretrain_t5_distributed_test.sh b/tests/functional_tests/test_scripts/t5/pretrain_t5_distributed_test.sh
@@ -129,3 +129,21 @@ echo "--------------------------------------------------------------------------
 
 echo "$command" > $SCRIPTS_DIR/pretrain_t5_distributed_command.sh
 eval $command
+
+echo "Saving test results to $TENSORBOARD_DIR"
+python3 ./tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py $TENSORBOARD_DIR "$JOB_NAME" | \
+    tee ${TENSORBOARD_DIR}/results.json
+
+if [[ $SKIP_PYTEST != 1 ]]; then
+    echo "-----------------------------------------------------------------------------"
+    if [[ $CHECKPOINT_RESUME_TEST -eq 1 ]]; then
+        echo "Running pytest 1st vs 2nd run comparison"
+        export LOGS_DIR=$TENSORBOARD_DIR
+        pytest ./tests/functional_tests/python_test_utils/test_resume_checkpoint_pipeline.py
+    else
+        echo "Running pytest checks against golden values"
+        export EXPECTED_METRICS_FILE="./tests/functional_tests/test_results/jet/${JOB_NAME}.json"
+        export LOGS_DIR=$TENSORBOARD_DIR
+        pytest ./tests/functional_tests/python_test_utils/test_ci_pipeline.py
+    fi
+fi