opendatahub-io
diff --git a/‎pipeline.py
+1-1 b/‎pipeline.py
+1-1
diff --git a/‎standalone/README.md
+144-2 b/‎standalone/README.md
+144-2
@@ -447,7 +447,7 @@ def gen_standalone():
         "exec-git-clone-op": {},
         "exec-huggingface-importer-op": 'huggingface_importer_op(repo_name="{REPO_GRANITE_7B_IMAGE}", model="{DATA_PVC_MODEL_PATH}")',
         "exec-run-mt-bench-op": 'run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",mt_bench_output="{MT_BENCH_OUTPUT_PATH}",models_folder="{CANDIDATE_MODEL_PATH_PREFIX}",models_path_prefix="{CANDIDATE_MODEL_PATH_PREFIX}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE})',
-        "exec-run-final-eval-op": 'run_final_eval_op(mmlu_branch_output="{MMLU_BRANCH_SCORES_PATH}", mt_bench_branch_output="{MT_BENCH_OUTPUT_PATH}", candidate_model="{CANDIDATE_MODEL_PATH}", taxonomy="{TAXONOMY_PATH}", tasks="{DATA_PVC_SDG_PATH}", base_branch="", candidate_branch="", device=None, base_model_dir="{DATA_PVC_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}, model_dtype="{MODEL_DTYPE}", few_shots={FEW_SHOTS}, batch_size={BATCH_SIZE})',
+        "exec-run-final-eval-op": 'run_final_eval_op(mmlu_branch_output="{MMLU_BRANCH_SCORES_PATH}", mt_bench_branch_output="{MT_BENCH_BRANCH_SCORES_PATH}", candidate_model="{CANDIDATE_MODEL_PATH}", taxonomy="{TAXONOMY_PATH}", tasks="{DATA_PVC_SDG_PATH}", base_branch="", candidate_branch="", device=None, base_model_dir="{DATA_PVC_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}, model_dtype="{MODEL_DTYPE}", few_shots={FEW_SHOTS}, batch_size={BATCH_SIZE})',
     }
 
     details = {}
 
@@ -26,6 +26,144 @@ The `standalone.py` script is designed to run within a Kubernetes environment. T
 > [!TIP]
 > Check the `show` command to display an example of a Kubernetes Job that runs the script. Run `./standalone.py show`.
 
+### RBAC Requirements when running in a Kubernetes Job
+
+The script manipulates a number of Kubernetes resources, and therefore requires the following RBAC
+permissions on the [ServiceAccount](https://kubernetes.io/docs/concepts/security/service-accounts/)
+running the script:
+
+```yaml
+# logs
+- verbs:
+    - get
+    - list
+  apiGroups:
+    - ""
+  resources:
+    - pods/log
+# Jobs
+- verbs:
+    - create
+    - get
+    - list
+    - watch
+  apiGroups:
+    - batch
+  resources:
+    - jobs
+# Pods
+- verbs:
+    - create
+    - get
+    - list
+    - watch
+  apiGroups:
+    - ""
+  resources:
+    - pods
+# Secrets
+- verbs:
+    - create
+    - get
+  apiGroups:
+    - ""
+  resources:
+    - secrets
+# ConfigMaps
+- verbs:
+    - create
+    - get
+  apiGroups:
+    - ""
+  resources:
+    - configmaps
+# PVCs
+- verbs:
+    - create
+  apiGroups:
+    - ""
+  resources:
+    - persistentvolumeclaims
+# PyTorchJob
+- verbs:
+    - create
+    - get
+    - list
+    - watch
+  apiGroups:
+    - kubeflow.org
+  resources:
+    - pytorchjobs
+# Watchers
+- verbs:
+    - get
+    - list
+    - watch
+  apiGroups:
+    - ""
+  resources:
+    - events
+```
+
+### Run in a Kubernetes Job
+
+The script can be run in a Kubernetes Job by creating a Job resource that runs the script. The
+`show` subcommand displays an example of a Kubernetes Job that runs the script:
+
+```bash
+./standalone/standalone.py show \
+  --image quay.io/opendatahub/workbench-images:jupyter-datascience-ubi9-python-3.11-20241004-609ffb8 \
+  --script-configmap standalone \
+  --script-name script \
+  --namespace leseb \
+  --args "--storage-class=nfs-csi" \
+  --args "--namespace=leseb" \
+  --args "--sdg-object-store-secret=sdg-object-store-credentials" \
+  --args "--judge-serving-model-secret=judge-serving-details"
+
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: distributed-ilab
+  namespace: leseb
+spec:
+  template:
+    spec:
+      containers:
+      - args:
+        - --storage-class=nfs-csi
+        - --namespace=leseb
+        - --sdg-object-store-secret=sdg-object-store-credentials
+        - --judge-serving-model-secret=judge-serving-details
+        command:
+        - python3
+        - /config/script
+        - run
+        image: quay.io/opendatahub/workbench-images:jupyter-datascience-ubi9-python-3.11-20241004-609ffb8
+        name: distributed-ilab
+        volumeMounts:
+        - mountPath: /config
+          name: script-config
+      restartPolicy: Never
+      serviceAccountName: default
+      volumes:
+      - configMap:
+          name: standalone
+        name: script-config
+```
+
+Optional arguments can be added to the `args` list to customize the script's behavior. They
+represent the script options that would be passed to the script if run from the command line.
+
+List of available options of the `show` subcommand:
+
+* `--namespace`: Kubernetes namespace to run the job
+* `--name`: Name of the job
+* `--image`: The image to use for the job
+* `--script-configmap`: The name of the ConfigMap that holds the script
+* `--script-name`: The name of the script in the ConfigMap
+* `--args`: Additional arguments to pass to the script - can be passed multiple times
+
 ## Features
 
 * Run any part of the InstructLab workflow in a standalone environment independently or a full end-to-end workflow:
@@ -36,7 +174,9 @@ The `standalone.py` script is designed to run within a Kubernetes environment. T
   * Evaluate model by running MT_Bench with `evaluation` subcommand along with `--eval-type mt-bench` option.
   * Final model evaluation with `evaluation` subcommand along with `--eval-type final` option.
       * Final evaluation runs both MT Bench_Branch and MMLU_Branch
-  * Push the final model back to the object store -  same location as the SDG data with `upload-trained-model` subcommand.
+  * Push the final model back to the object store -  same location as the SDG data with
+    `upload-trained-model` subcommand.
+* Dry-run mode to print the generated Kubernetes resources without executing - `--dry-run` option.
 
 > [!NOTE]
 > Read about InstructLab model evaluation in the [instructlab/eval repository](https://github.com/instructlab/eval/blob/main/README.md).
@@ -124,7 +264,9 @@ evaluation
 * `--training-1-epoch-num`: The number of epochs to train the model for phase 1. **Optional** - Default: 7.
 * `--training-2-epoch-num`: The number of epochs to train the model for phase 2. **Optional** -
   Default: 10.
-* `--eval-type`: The evaluation type to use. **Optional** - Default: `mt-bench`. Available options: `mt-bench`, `final`.
+* `--eval-type`: The evaluation type to use. **Optional** - Default: `mt-bench`. Available options:
+  `mt-bench`, `final`.
+* `--dry-run`: Print the generated Kubernetes resources without executing them. **Optional** - Default: false.
 
 
 ## Example Workflow with Synthetic Data Generation (SDG)
Original file line number	Diff line number	Diff line change
`@@ -447,7 +447,7 @@ def gen_standalone():`
`447`	`447`	`"exec-git-clone-op": {},`
`448`	`448`	`"exec-huggingface-importer-op": 'huggingface_importer_op(repo_name="{REPO_GRANITE_7B_IMAGE}", model="{DATA_PVC_MODEL_PATH}")',`
`449`	`449`	`"exec-run-mt-bench-op": 'run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",mt_bench_output="{MT_BENCH_OUTPUT_PATH}",models_folder="{CANDIDATE_MODEL_PATH_PREFIX}",models_path_prefix="{CANDIDATE_MODEL_PATH_PREFIX}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE})',`
`450`		- "exec-run-final-eval-op": 'run_final_eval_op(mmlu_branch_output="{MMLU_BRANCH_SCORES_PATH}", mt_bench_branch_output="{MT_BENCH_OUTPUT_PATH}", candidate_model="{CANDIDATE_MODEL_PATH}", taxonomy="{TAXONOMY_PATH}", tasks="{DATA_PVC_SDG_PATH}", base_branch="", candidate_branch="", device=None, base_model_dir="{DATA_PVC_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}, model_dtype="{MODEL_DTYPE}", few_shots={FEW_SHOTS}, batch_size={BATCH_SIZE})',
	`450`	+ "exec-run-final-eval-op": 'run_final_eval_op(mmlu_branch_output="{MMLU_BRANCH_SCORES_PATH}", mt_bench_branch_output="{MT_BENCH_BRANCH_SCORES_PATH}", candidate_model="{CANDIDATE_MODEL_PATH}", taxonomy="{TAXONOMY_PATH}", tasks="{DATA_PVC_SDG_PATH}", base_branch="", candidate_branch="", device=None, base_model_dir="{DATA_PVC_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}, model_dtype="{MODEL_DTYPE}", few_shots={FEW_SHOTS}, batch_size={BATCH_SIZE})',
`451`	`451`	`}`
`452`	`452`
`453`	`453`	`details = {}`