opendatahub-io · cooktheryan · Dec 17, 2024 · Dec 17, 2024 · Dec 17, 2024 · Dec 17, 2024
diff --git a/importer-pipeline.yaml b/importer-pipeline.yaml
@@ -38,7 +38,7 @@ deploymentSpec:
           value: /tmp
         - name: XDG_DATA_HOME
           value: /tmp
-        image: registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1
+        image: quay.io/redhat-et/ilab:1.3.1
 pipelineInfo:
   description: Helper pipeline to the InstructLab pipeline which allows users to seed/import
     a new base model

diff --git a/pipeline.py b/pipeline.py
@@ -10,6 +10,7 @@
     DeletePVC,
     mount_pvc,
     set_image_pull_policy,
+    set_image_pull_secrets,
     use_config_map_as_env,
     use_secret_as_env,
     use_secret_as_volume,
@@ -22,6 +23,7 @@
 MOCKED_STAGES = ["sdg", "train", "eval"]
 PIPELINE_FILE_NAME = "pipeline.yaml"
 IMPORTER_PIPELINE_FILE_NAME = "importer-pipeline.yaml"
+IMAGE_PULL_SECRET = "redhat-et-ilab-botty-pull-secret"
 STANDALONE_TEMPLATE_FILE_NAME = "standalone.tpl"
 GENERATED_STANDALONE_FILE_NAME = "standalone.py"
 DEFAULT_REPO_URL = "https://github.com/instructlab/taxonomy.git"
@@ -214,6 +216,8 @@ def pipeline(
         # uncomment if updating image with same tag
         # set_image_pull_policy(sdg_task, "Always")
 
+        set_image_pull_secrets(sdg_task, [IMAGE_PULL_SECRET])
+
         # Training stage
         model_source_s3_task = dsl.importer(
             artifact_uri=sdg_base_model, artifact_class=dsl.Model
@@ -248,6 +252,8 @@ def pipeline(
         data_processing_task.set_caching_options(False)
         data_processing_task.set_env_variable("XDG_CACHE_HOME", "/tmp")
 
+        set_image_pull_secrets(data_processing_task, [IMAGE_PULL_SECRET])
+
         # Upload "skills_processed_data" and "knowledge_processed_data" artifacts to S3 without blocking the rest of the workflow
         skills_processed_data_to_artifact_task = skills_processed_data_to_artifact_op()
         skills_processed_data_to_artifact_task.after(data_processing_task)
@@ -347,6 +353,7 @@ def pipeline(
             JUDGE_CONFIG_MAP,
             dict(endpoint="JUDGE_ENDPOINT", model="JUDGE_NAME"),
         )
+        set_image_pull_secrets(run_mt_bench_task, [IMAGE_PULL_SECRET])
         use_secret_as_env(run_mt_bench_task, JUDGE_SECRET, {"api_key": "JUDGE_API_KEY"})
 
         # uncomment if updating image with same tag
@@ -385,6 +392,7 @@ def pipeline(
 
         final_eval_task.set_env_variable("HOME", "/tmp")
         final_eval_task.set_env_variable("HF_HOME", "/tmp")
+        set_image_pull_secrets(final_eval_task, [IMAGE_PULL_SECRET])
 
         # uncomment if updating image with same tag
         # set_image_pull_policy(final_eval_task, "Always")

diff --git a/pipeline.yaml b/pipeline.yaml
@@ -648,7 +648,7 @@ deploymentSpec:
         env:
         - name: XDG_CACHE_HOME
           value: /tmp
-        image: registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1
+        image: quay.io/redhat-et/ilab:1.3.1
     exec-deletepvc:
       container:
         image: argostub/deletepvc
@@ -747,7 +747,7 @@ deploymentSpec:
           \       path_to_data = \"/input_data/knowledge/data.jsonl\"\n    elif phase_num\
           \ == 2:\n        path_to_model = list_phase1_final_model()\n        path_to_data\
           \ = \"/input_data/skills/data.jsonl\"\n    else:\n        raise RuntimeError(f\"\
-          Unsupported value of {phase_num=}\")\n\n    image = \"registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1\"\
+          Unsupported value of {phase_num=}\")\n\n    image = \"quay.io/redhat-et/ilab:1.3.1\"\
           \n\n    manifest = inspect.cleandoc(\n        f\"\"\"\n        apiVersion:\
           \ kubeflow.org/v1\n        kind: PyTorchJob\n        metadata:\n       \
           \   name: {name}\n        spec:\n          nprocPerNode: \\\"{nproc_per_node}\\\
@@ -951,7 +951,7 @@ deploymentSpec:
           \       path_to_data = \"/input_data/knowledge/data.jsonl\"\n    elif phase_num\
           \ == 2:\n        path_to_model = list_phase1_final_model()\n        path_to_data\
           \ = \"/input_data/skills/data.jsonl\"\n    else:\n        raise RuntimeError(f\"\
-          Unsupported value of {phase_num=}\")\n\n    image = \"registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1\"\
+          Unsupported value of {phase_num=}\")\n\n    image = \"quay.io/redhat-et/ilab:1.3.1\"\
           \n\n    manifest = inspect.cleandoc(\n        f\"\"\"\n        apiVersion:\
           \ kubeflow.org/v1\n        kind: PyTorchJob\n        metadata:\n       \
           \   name: {name}\n        spec:\n          nprocPerNode: \\\"{nproc_per_node}\\\
@@ -1373,7 +1373,7 @@ deploymentSpec:
           value: /tmp
         - name: HF_HOME
           value: /tmp
-        image: registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1
+        image: quay.io/redhat-et/ilab:1.3.1
         resources:
           accelerator:
             count: '1'
@@ -1509,7 +1509,7 @@ deploymentSpec:
           value: /tmp
         - name: HF_HOME
           value: /tmp
-        image: registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1
+        image: quay.io/redhat-et/ilab:1.3.1
         resources:
           accelerator:
             count: '1'
@@ -1619,7 +1619,7 @@ deploymentSpec:
           value: /tmp
         - name: HF_HOME
           value: /tmp
-        image: registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1
+        image: quay.io/redhat-et/ilab:1.3.1
     exec-sdg-to-artifact-op:
       container:
         args:
@@ -2240,6 +2240,8 @@ platforms:
     deploymentSpec:
       executors:
         exec-data-processing-op:
+          imagePullSecret:
+          - secretName: redhat-et-ilab-botty-pull-secret
           pvcMount:
           - mountPath: /model
             taskOutputParameter:
@@ -2293,6 +2295,8 @@ platforms:
               envVar: JUDGE_ENDPOINT
             - configMapKey: model
               envVar: JUDGE_NAME
+          imagePullSecret:
+          - secretName: redhat-et-ilab-botty-pull-secret
           pvcMount:
           - mountPath: /output
             taskOutputParameter:
@@ -2319,6 +2323,8 @@ platforms:
               envVar: JUDGE_ENDPOINT
             - configMapKey: model
               envVar: JUDGE_NAME
+          imagePullSecret:
+          - secretName: redhat-et-ilab-botty-pull-secret
           pvcMount:
           - mountPath: /output
             taskOutputParameter:
@@ -2337,6 +2343,8 @@ platforms:
               envVar: endpoint
             - configMapKey: model
               envVar: model
+          imagePullSecret:
+          - secretName: redhat-et-ilab-botty-pull-secret
           pvcMount:
           - mountPath: /data
             taskOutputParameter:

diff --git a/rhoai-ilab-image/Containerfile b/rhoai-ilab-image/Containerfile
@@ -1,4 +1,3 @@
-FROM registry.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3
+FROM registry.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1
 
-RUN pip install kfp==2.9.0
-RUN pip install kubeflow-training
+ADD connection.py /opt/app-root/lib64/python3.11/site-packages/multiprocess/connection.py