Skip to content

Commit

Permalink
add additional step to pipeline to generate a metrics report
Browse files Browse the repository at this point in the history
Signed-off-by: Michael Clifford <[email protected]>
  • Loading branch information
MichaelClifford committed Dec 18, 2024
1 parent cd8bce7 commit 31fa421
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 5 deletions.
4 changes: 2 additions & 2 deletions eval/final/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .components import run_final_eval_op
from .components import generate_metrics_report_op, run_final_eval_op

# from . import faked

__all__ = ["run_final_eval_op"]
__all__ = ["run_final_eval_op", "generate_metrics_report_op"]
37 changes: 35 additions & 2 deletions eval/final/components.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# type: ignore
# pylint: disable=no-value-for-parameter,import-outside-toplevel,import-error

from kfp.dsl import Artifact, Output, component
from kfp.dsl import Artifact, Input, Metrics, Output, component

from utils.consts import RHELAI_IMAGE
from utils.consts import PYTHON_IMAGE, RHELAI_IMAGE


@component(base_image=RHELAI_IMAGE, install_kfp_package=False)
Expand Down Expand Up @@ -480,3 +480,36 @@ def find_node_dataset_directories(base_dir: str):

with open(mt_bench_branch_output.path, "w", encoding="utf-8") as f:
json.dump(mt_bench_branch_data, f, indent=4)


@component(base_image=PYTHON_IMAGE, install_kfp_package=False)
def generate_metrics_report_op(
mmlu_branch_output: Input[Artifact],
mt_bench_branch_output: Input[Artifact],
mt_bench_output: Input[Artifact],
metrics: Output[Metrics],
):
import ast
import json

with open(mt_bench_output.path, "r") as f:
mt_bench_data = f.read()
mt_bench_data = ast.literal_eval(mt_bench_data)[0]

metrics.log_metric("mt_bench_best_model", mt_bench_data["model"])
metrics.log_metric("mt_bench_best_score", mt_bench_data["overall_score"])
metrics.log_metric("mt_bench_best_model_error_rate", mt_bench_data["error_rate"])

with open(mt_bench_branch_output.path, "r") as f:
mt_bench_branch_data = json.loads(f.read())

metrics.log_metric("mt_bench_branch_score", mt_bench_branch_data["overall_score"])
metrics.log_metric(
"mt_bench_branch_base_score", mt_bench_branch_data["base_overall_score"]
)

with open(mmlu_branch_output.path, "r") as f:
mmlu_branch_data = json.loads(f.read())

metrics.log_metric("mmlu_branch_score", mmlu_branch_data["model_score"])
metrics.log_metric("mmlu_branch_base_score", mmlu_branch_data["base_model_score"])
10 changes: 9 additions & 1 deletion pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def ilab_pipeline_wrapper(mock: List[Literal[MOCKED_STAGES]]):
)

# Imports for evaluation
from eval.final import run_final_eval_op
from eval.final import generate_metrics_report_op, run_final_eval_op
from eval.mt_bench import run_mt_bench_op

@dsl.pipeline(
Expand Down Expand Up @@ -428,6 +428,14 @@ def pipeline(
model_pvc_delete_task = DeletePVC(pvc_name=model_pvc_task.output)
model_pvc_delete_task.after(final_eval_task)

generate_metrics_report_task = generate_metrics_report_op(
mmlu_branch_output=final_eval_task.outputs["mmlu_branch_output"],
mt_bench_branch_output=final_eval_task.outputs["mt_bench_branch_output"],
mt_bench_output=output_mt_bench_task.outputs["mt_bench_output"],
)
generate_metrics_report_task.after(output_mt_bench_task)
generate_metrics_report_task.set_caching_options(False)

return

return pipeline
Expand Down
95 changes: 95 additions & 0 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
# train_num_warmup_steps_phase_2: int [Default: 1000.0]
# train_save_samples: int [Default: 250000.0]
# train_seed: int [Default: 42.0]
# Outputs:
# generate-metrics-report-op-metrics: system.Metrics
components:
comp-createpvc:
executorLabel: exec-createpvc
Expand Down Expand Up @@ -266,6 +268,28 @@ components:
description: Name of the PVC to delete. Supports passing a runtime-generated
name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``.
parameterType: STRING
comp-generate-metrics-report-op:
executorLabel: exec-generate-metrics-report-op
inputDefinitions:
artifacts:
mmlu_branch_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
mt_bench_branch_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
mt_bench_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
outputDefinitions:
artifacts:
metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
comp-git-clone-op:
executorLabel: exec-git-clone-op
inputDefinitions:
Expand Down Expand Up @@ -658,6 +682,42 @@ deploymentSpec:
exec-deletepvc-3:
container:
image: argostub/deletepvc
exec-generate-metrics-report-op:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- generate_metrics_report_op
command:
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef generate_metrics_report_op(\n mmlu_branch_output: Input[Artifact],\n\
\ mt_bench_branch_output: Input[Artifact],\n mt_bench_output: Input[Artifact],\n\
\ metrics: Output[Metrics],\n):\n import ast\n import json\n\n\
\ with open(mt_bench_output.path, \"r\") as f:\n mt_bench_data\
\ = f.read()\n mt_bench_data = ast.literal_eval(mt_bench_data)[0]\n\n\
\ metrics.log_metric(\"mt_bench_best_model\", mt_bench_data[\"model\"\
])\n metrics.log_metric(\"mt_bench_best_score\", mt_bench_data[\"overall_score\"\
])\n metrics.log_metric(\"mt_bench_best_model_error_rate\", mt_bench_data[\"\
error_rate\"])\n\n with open(mt_bench_branch_output.path, \"r\") as f:\n\
\ mt_bench_branch_data = json.loads(f.read())\n\n metrics.log_metric(\"\
mt_bench_branch_score\", mt_bench_branch_data[\"overall_score\"])\n metrics.log_metric(\n\
\ \"mt_bench_branch_base_score\", mt_bench_branch_data[\"base_overall_score\"\
]\n )\n\n with open(mmlu_branch_output.path, \"r\") as f:\n \
\ mmlu_branch_data = json.loads(f.read())\n\n metrics.log_metric(\"\
mmlu_branch_score\", mmlu_branch_data[\"model_score\"])\n metrics.log_metric(\"\
mmlu_branch_base_score\", mmlu_branch_data[\"base_model_score\"])\n\n"
image: quay.io/modh/odh-generic-data-science-notebook:v3-2024b-20241111
exec-git-clone-op:
container:
args:
Expand Down Expand Up @@ -1650,6 +1710,12 @@ pipelineInfo:
name: instructlab
root:
dag:
outputs:
artifacts:
generate-metrics-report-op-metrics:
artifactSelectors:
- outputArtifactKey: metrics
producerSubtask: generate-metrics-report-op
tasks:
createpvc:
cachingOptions:
Expand Down Expand Up @@ -1779,6 +1845,29 @@ root:
producerTask: createpvc-2
taskInfo:
name: deletepvc-3
generate-metrics-report-op:
cachingOptions: {}
componentRef:
name: comp-generate-metrics-report-op
dependentTasks:
- pvc-to-mt-bench-op
- run-final-eval-op
inputs:
artifacts:
mmlu_branch_output:
taskOutputArtifact:
outputArtifactKey: mmlu_branch_output
producerTask: run-final-eval-op
mt_bench_branch_output:
taskOutputArtifact:
outputArtifactKey: mt_bench_branch_output
producerTask: run-final-eval-op
mt_bench_output:
taskOutputArtifact:
outputArtifactKey: mt_bench_output
producerTask: pvc-to-mt-bench-op
taskInfo:
name: generate-metrics-report-op
git-clone-op:
cachingOptions: {}
componentRef:
Expand Down Expand Up @@ -2232,6 +2321,12 @@ root:
description: Training parameter. Random seed for initializing training.
isOptional: true
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
generate-metrics-report-op-metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
schemaVersion: 2.1.0
sdkVersion: kfp-2.9.0
---
Expand Down

0 comments on commit 31fa421

Please sign in to comment.