From 22bf27e242f0adf031678aa4e821a34de068432f Mon Sep 17 00:00:00 2001 From: Michael Clifford Date: Wed, 8 Jan 2025 21:13:24 -0500 Subject: [PATCH] add components to output artifacts from final eval Signed-off-by: Michael Clifford --- eval/__init__.py | 2 +- eval/final.py | 2 +- pipeline.py | 41 ++++++++++++++++++---- pipeline.yaml | 85 +++++++++++++++++++++++++++++++++++++++++++-- utils/__init__.py | 4 +++ utils/components.py | 20 +++++++++++ 6 files changed, 143 insertions(+), 11 deletions(-) diff --git a/eval/__init__.py b/eval/__init__.py index 8ffc0d3..08b58a7 100644 --- a/eval/__init__.py +++ b/eval/__init__.py @@ -1,4 +1,4 @@ -from .final import run_final_eval_op, generate_metrics_report_op +from .final import generate_metrics_report_op, run_final_eval_op from .mt_bench import run_mt_bench_op __all__ = ["run_final_eval_op", "run_mt_bench_op", "generate_metrics_report_op"] diff --git a/eval/final.py b/eval/final.py index cc814d4..7d0f473 100644 --- a/eval/final.py +++ b/eval/final.py @@ -1,7 +1,7 @@ # type: ignore # pylint: disable=import-outside-toplevel,import-error -from kfp.dsl import Artifact, Input, Metrics, Output, component +from kfp.dsl import Metrics, Output, component from utils.consts import PYTHON_IMAGE, RHELAI_IMAGE diff --git a/pipeline.py b/pipeline.py index 2b53d09..c55a2e0 100644 --- a/pipeline.py +++ b/pipeline.py @@ -17,7 +17,7 @@ use_secret_as_volume, ) -from eval import run_final_eval_op, run_mt_bench_op, generate_metrics_report_op +from eval import generate_metrics_report_op, run_final_eval_op, run_mt_bench_op from sdg import ( git_clone_op, sdg_op, @@ -33,7 +33,9 @@ from utils import ( ilab_importer_op, model_to_pvc_op, + pvc_to_mmlu_branch_op, pvc_to_model_op, + pvc_to_mt_bench_branch_op, pvc_to_mt_bench_op, ) from utils.consts import RHELAI_IMAGE @@ -58,13 +60,11 @@ JUDGE_CA_CERT_PATH = "/tmp/cert" - @dsl.pipeline( display_name="InstructLab", name="instructlab", description="InstructLab pipeline", ) - def ilab_pipeline( # SDG phase sdg_repo_url: str = "https://github.com/instructlab/taxonomy.git", @@ -426,6 +426,30 @@ def ilab_pipeline( mount_path="/output", ) + output_mt_bench_branch_task = pvc_to_mt_bench_branch_op( + pvc_path="/output/mt_bench_branch/mt_bench_branch_data.json", + ) + output_mt_bench_branch_task.after(final_eval_task) + output_mt_bench_branch_task.set_caching_options(False) + + mount_pvc( + task=output_mt_bench_branch_task, + pvc_name=output_pvc_task.output, + mount_path="/output", + ) + + output_mmlu_branch_task = pvc_to_mmlu_branch_op( + pvc_path="/output/mmlu_branch/mmlu_branch_data.json", + ) + output_mmlu_branch_task.after(final_eval_task) + output_mmlu_branch_task.set_caching_options(False) + + mount_pvc( + task=output_mmlu_branch_task, + pvc_name=output_pvc_task.output, + mount_path="/output", + ) + sdg_pvc_delete_task = DeletePVC(pvc_name=sdg_input_pvc_task.output) sdg_pvc_delete_task.after(final_eval_task) @@ -433,20 +457,23 @@ def ilab_pipeline( model_pvc_delete_task.after(final_eval_task) generate_metrics_report_task = generate_metrics_report_op() - generate_metrics_report_task.after(output_mt_bench_task, final_eval_task) + generate_metrics_report_task.after(final_eval_task) generate_metrics_report_task.set_caching_options(False) mount_pvc( task=generate_metrics_report_task, pvc_name=output_pvc_task.output, mount_path="/output", ) - + output_pvc_delete_task = DeletePVC(pvc_name=output_pvc_task.output) output_pvc_delete_task.after( output_model_task, - output_mt_bench_task, final_eval_task + output_mt_bench_task, + output_mmlu_branch_task, + output_mt_bench_branch_task, + generate_metrics_report_task, ) - + return diff --git a/pipeline.yaml b/pipeline.yaml index 3512589..49f6486 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -329,6 +329,18 @@ components: defaultValue: /model isOptional: true parameterType: STRING + comp-pvc-to-mmlu-branch-op: + executorLabel: exec-pvc-to-mmlu-branch-op + inputDefinitions: + parameters: + pvc_path: + parameterType: STRING + outputDefinitions: + artifacts: + mmlu_branch_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 comp-pvc-to-model-op: executorLabel: exec-pvc-to-model-op inputDefinitions: @@ -341,6 +353,18 @@ components: artifactType: schemaTitle: system.Model schemaVersion: 0.0.1 + comp-pvc-to-mt-bench-branch-op: + executorLabel: exec-pvc-to-mt-bench-branch-op + inputDefinitions: + parameters: + pvc_path: + parameterType: STRING + outputDefinitions: + artifacts: + mt_bench_branch_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 comp-pvc-to-mt-bench-op: executorLabel: exec-pvc-to-mt-bench-op inputDefinitions: @@ -765,6 +789,14 @@ deploymentSpec: - /bin/sh - -c image: registry.redhat.io/ubi9/toolbox@sha256:da31dee8904a535d12689346e65e5b00d11a6179abf1fa69b548dbd755fa2770 + exec-pvc-to-mmlu-branch-op: + container: + args: + - cp -r {{$.inputs.parameters['pvc_path']}} {{$.outputs.artifacts['mmlu_branch_output'].path}} + command: + - /bin/sh + - -c + image: registry.redhat.io/ubi9/toolbox@sha256:da31dee8904a535d12689346e65e5b00d11a6179abf1fa69b548dbd755fa2770 exec-pvc-to-model-op: container: args: @@ -773,6 +805,14 @@ deploymentSpec: - /bin/sh - -c image: registry.redhat.io/ubi9/toolbox@sha256:da31dee8904a535d12689346e65e5b00d11a6179abf1fa69b548dbd755fa2770 + exec-pvc-to-mt-bench-branch-op: + container: + args: + - cp -r {{$.inputs.parameters['pvc_path']}} {{$.outputs.artifacts['mt_bench_branch_output'].path}} + command: + - /bin/sh + - -c + image: registry.redhat.io/ubi9/toolbox@sha256:da31dee8904a535d12689346e65e5b00d11a6179abf1fa69b548dbd755fa2770 exec-pvc-to-mt-bench-op: container: args: @@ -1700,9 +1740,11 @@ root: name: comp-deletepvc-3 dependentTasks: - createpvc-3 + - generate-metrics-report-op + - pvc-to-mmlu-branch-op - pvc-to-model-op + - pvc-to-mt-bench-branch-op - pvc-to-mt-bench-op - - run-final-eval-op inputs: parameters: pvc_name: @@ -1717,7 +1759,6 @@ root: name: comp-generate-metrics-report-op dependentTasks: - createpvc-3 - - pvc-to-mt-bench-op - run-final-eval-op taskInfo: name: generate-metrics-report-op @@ -1772,6 +1813,20 @@ root: producerTask: importer taskInfo: name: model-to-pvc-op + pvc-to-mmlu-branch-op: + cachingOptions: {} + componentRef: + name: comp-pvc-to-mmlu-branch-op + dependentTasks: + - createpvc-3 + - run-final-eval-op + inputs: + parameters: + pvc_path: + runtimeValue: + constant: /output/mmlu_branch/mmlu_branch_data.json + taskInfo: + name: pvc-to-mmlu-branch-op pvc-to-model-op: cachingOptions: {} componentRef: @@ -1786,6 +1841,20 @@ root: constant: /output/phase_2/model/hf_format/candidate_model taskInfo: name: pvc-to-model-op + pvc-to-mt-bench-branch-op: + cachingOptions: {} + componentRef: + name: comp-pvc-to-mt-bench-branch-op + dependentTasks: + - createpvc-3 + - run-final-eval-op + inputs: + parameters: + pvc_path: + runtimeValue: + constant: /output/mt_bench_branch/mt_bench_branch_data.json + taskInfo: + name: pvc-to-mt-bench-branch-op pvc-to-mt-bench-op: cachingOptions: {} componentRef: @@ -2226,12 +2295,24 @@ platforms: taskOutputParameter: outputParameterKey: name producerTask: createpvc-2 + exec-pvc-to-mmlu-branch-op: + pvcMount: + - mountPath: /output + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-3 exec-pvc-to-model-op: pvcMount: - mountPath: /output taskOutputParameter: outputParameterKey: name producerTask: createpvc-3 + exec-pvc-to-mt-bench-branch-op: + pvcMount: + - mountPath: /output + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-3 exec-pvc-to-mt-bench-op: pvcMount: - mountPath: /output diff --git a/utils/__init__.py b/utils/__init__.py index 378ba5d..e062ba1 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -1,13 +1,17 @@ from .components import ( ilab_importer_op, model_to_pvc_op, + pvc_to_mmlu_branch_op, pvc_to_model_op, + pvc_to_mt_bench_branch_op, pvc_to_mt_bench_op, ) __all__ = [ "model_to_pvc_op", "pvc_to_mt_bench_op", + "pvc_to_mt_bench_branch_op", + "pvc_to_mmlu_branch_op", "pvc_to_model_op", "ilab_importer_op", ] diff --git a/utils/components.py b/utils/components.py index 31bbd80..c0a9dd7 100644 --- a/utils/components.py +++ b/utils/components.py @@ -14,6 +14,26 @@ def pvc_to_mt_bench_op(mt_bench_output: dsl.Output[dsl.Artifact], pvc_path: str) ) +@dsl.container_component +def pvc_to_mt_bench_branch_op( + mt_bench_branch_output: dsl.Output[dsl.Artifact], pvc_path: str +): + return dsl.ContainerSpec( + TOOLBOX_IMAGE, + ["/bin/sh", "-c"], + [f"cp -r {pvc_path} {mt_bench_branch_output.path}"], + ) + + +@dsl.container_component +def pvc_to_mmlu_branch_op(mmlu_branch_output: dsl.Output[dsl.Artifact], pvc_path: str): + return dsl.ContainerSpec( + TOOLBOX_IMAGE, + ["/bin/sh", "-c"], + [f"cp -r {pvc_path} {mmlu_branch_output.path}"], + ) + + @dsl.container_component def pvc_to_model_op(model: dsl.Output[dsl.Model], pvc_path: str): return dsl.ContainerSpec(