From fce13b1b936f0a4c6dbb4be8ac456efd90c078f3 Mon Sep 17 00:00:00 2001 From: Gorkem Ercan Date: Fri, 28 Nov 2025 13:10:24 -0500 Subject: [PATCH 1/5] Add Kubeflow Pipeline components for ModelKits Introduces push-modelkit and unpack-modelkit components to enable integration of KitOps with Kubeflow Pipelines. Signed-off-by: Gorkem Ercan --- .../kubeflow-components/Dockerfile | 32 ++ .../dockerfiles/kubeflow-components/README.md | 316 ++++++++++++++++ .../components/push-modelkit/component.yaml | 51 +++ .../components/unpack-modelkit/component.yaml | 20 ++ .../kubeflow-components/examples/README.md | 27 ++ .../examples/house-prices-pipeline.py | 245 +++++++++++++ .../examples/train-house-prices.yaml | 123 +++++++ .../kubeflow-components/scripts/lib/common.sh | 112 ++++++ .../scripts/push-modelkit.sh | 232 ++++++++++++ .../scripts/unpack-modelkit.sh | 53 +++ .../tests/push-modelkit.bats | 337 ++++++++++++++++++ .../tests/unpack-modelkit.bats | 188 ++++++++++ 12 files changed, 1736 insertions(+) create mode 100644 build/dockerfiles/kubeflow-components/Dockerfile create mode 100644 build/dockerfiles/kubeflow-components/README.md create mode 100644 build/dockerfiles/kubeflow-components/components/push-modelkit/component.yaml create mode 100644 build/dockerfiles/kubeflow-components/components/unpack-modelkit/component.yaml create mode 100644 build/dockerfiles/kubeflow-components/examples/README.md create mode 100644 build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py create mode 100644 build/dockerfiles/kubeflow-components/examples/train-house-prices.yaml create mode 100644 build/dockerfiles/kubeflow-components/scripts/lib/common.sh create mode 100755 build/dockerfiles/kubeflow-components/scripts/push-modelkit.sh create mode 100755 build/dockerfiles/kubeflow-components/scripts/unpack-modelkit.sh create mode 100755 build/dockerfiles/kubeflow-components/tests/push-modelkit.bats create mode 100755 build/dockerfiles/kubeflow-components/tests/unpack-modelkit.bats diff --git a/build/dockerfiles/kubeflow-components/Dockerfile b/build/dockerfiles/kubeflow-components/Dockerfile new file mode 100644 index 00000000..2a518b0b --- /dev/null +++ b/build/dockerfiles/kubeflow-components/Dockerfile @@ -0,0 +1,32 @@ +# Multi-platform digest for Cosign v2.4.0 +ARG COSIGN_DIGEST=sha256:9d50ceb15f023eda8f58032849eedc0216236d2e2f4cfe1cdf97c00ae7798cfe +ARG KIT_BASE_IMAGE=ghcr.io/kitops-ml/kitops:next + +FROM gcr.io/projectsigstore/cosign@$COSIGN_DIGEST AS cosign-install +FROM $KIT_BASE_IMAGE + +# Install additional tools needed for scripts +USER 0 +RUN apk add --no-cache \ + bash \ + jq +USER 1001 + +# Copy cosign from multi-platform build +COPY --from=cosign-install /ko-app/cosign /usr/local/bin/cosign + +# Copy scripts (needs root for chmod) +USER 0 +COPY scripts/ /scripts/ +RUN chmod +x /scripts/*.sh +USER 1001 + +# Set working directory +WORKDIR /workspace + +# Default entrypoint +ENTRYPOINT ["/bin/bash"] + +LABEL org.opencontainers.image.description="KitOps Kubeflow Pipeline Components" +LABEL org.opencontainers.image.source="https://github.com/kitops-ml/kitops" +LABEL org.opencontainers.image.licenses="Apache-2.0" diff --git a/build/dockerfiles/kubeflow-components/README.md b/build/dockerfiles/kubeflow-components/README.md new file mode 100644 index 00000000..546151f5 --- /dev/null +++ b/build/dockerfiles/kubeflow-components/README.md @@ -0,0 +1,316 @@ +# Kubeflow Pipeline ModelKit Components + +Kubeflow Pipeline components for packaging and deploying ML artifacts as KitOps ModelKits. + +## Components + +### push-modelkit + +Packages ML artifacts in a directory as a ModelKit and pushes it to an OCI registry. + +If a `Kitfile` exists in `modelkit_dir`, it is used as-is. Otherwise, one is auto-generated via `kit init`. + +**Required inputs** + +- `registry` – Container registry host (e.g., `registry.io`) +- `repository` – Repository path (e.g., `myorg/mymodel`) +- `tag` – ModelKit tag (default: `latest`) +- `modelkit_dir` – Directory with model files (with or without `Kitfile`) + +**Optional metadata (for Kitfile)** + +- `modelkit_name` – ModelKit package name +- `modelkit_desc` – ModelKit description +- `modelkit_author` – ModelKit author + +**Optional attestation metadata** + +- `dataset_uri` – Dataset URI +- `code_repo` – Code repository URL +- `code_commit` – Code commit hash + +**Outputs** + +- `uri` – Full ModelKit URI with digest (e.g., `registry.io/myorg/mymodel@sha256:abc…`) +- `digest` – ModelKit digest (e.g., `sha256:abc…`) + +### unpack-modelkit + +Pulls a ModelKit from a registry and extracts it. + +**Inputs** + +- `modelkit_uri` – ModelKit reference (e.g., `registry.io/repo:tag` or `registry.io/repo@sha256:…`) +- `extract_path` – Directory to extract contents (default: `/tmp/model`) + +**Outputs** + +- `model_path` – Directory where contents were extracted + +## Usage Examples + +Complete, runnable examples (including a full house-prices pipeline) are in the [`examples/`](examples/) directory. + +### Basic usage + +Training component that writes ML artifacts to a directory: + +```python +from kfp import dsl + +@dsl.component( + packages_to_install=['pandas', 'xgboost', 'scikit-learn'], + base_image='python:3.11-slim', +) +def train_model(modelkit_dir: dsl.Output[dsl.Artifact]): + """Train model and save to directory.""" + import os + import pickle + + model = train_your_model() + os.makedirs(modelkit_dir.path, exist_ok=True) + + with open(os.path.join(modelkit_dir.path, 'model.pkl'), 'wb') as f: + pickle.dump(model, f) + + save_dataset(os.path.join(modelkit_dir.path, 'predictions.csv')) + save_code(os.path.join(modelkit_dir.path, 'train.py')) + save_docs(os.path.join(modelkit_dir.path, 'README.md')) +``` + +Component to push the directory as a ModelKit: + +```python +from kfp import dsl, kubernetes + +@dsl.container_component +def push_modelkit( + registry: str, + repository: str, + tag: str, + input_modelkit_dir: dsl.Input[dsl.Artifact], + output_uri: dsl.Output[dsl.Artifact], + output_digest: dsl.Output[dsl.Artifact], + modelkit_name: str = '', + modelkit_desc: str = '', + modelkit_author: str = '', + dataset_uri: str = '', + code_repo: str = '', + code_commit: str = '', +): + return dsl.ContainerSpec( + image='ghcr.io/kitops-ml/kubeflow:latest', + command=['/bin/bash', '-c'], + args=[ + f'/scripts/push-modelkit.sh ' + f'"{registry}" "{repository}" "{tag}" ' + f'--modelkit-dir "{input_modelkit_dir.path}" ' + f'--name "{modelkit_name}" ' + f'--desc "{modelkit_desc}" ' + f'--author "{modelkit_author}" ' + f'--dataset-uri "{dataset_uri}" ' + f'--code-repo "{code_repo}" ' + f'--code-commit "{code_commit}" ' + f'&& cp /tmp/outputs/uri "{output_uri.path}" ' + f'&& cp /tmp/outputs/digest "{output_digest.path}"' + ], + ) +``` + +Simple end‑to‑end pipeline: + +```python +@dsl.pipeline( + name='simple-modelkit-pipeline', + description='Train and package as ModelKit', +) +def simple_pipeline( + registry: str = 'jozu.ml', + repository: str = 'team/model', + tag: str = 'latest', +): + train = train_model() + + push = push_modelkit( + registry=registry, + repository=repository, + tag=tag, + input_modelkit_dir=train.outputs['modelkit_dir'], + modelkit_name='My Model', + modelkit_desc='Description of my model', + modelkit_author='Data Science Team', + ) + + kubernetes.use_secret_as_volume( + push, + secret_name='docker-config', + mount_path='/etc/docker-config', + ) +``` + +### Using a custom Kitfile + +If you need full control, create a `Kitfile` alongside your artifacts: + +```python +@dsl.component(base_image='python:3.11-slim') +def train_with_kitfile(modelkit_dir: dsl.Output[dsl.Artifact]): + """Train and create custom Kitfile.""" + import os + + train_and_save_model(modelkit_dir.path) + + kitfile_content = """ +manifestVersion: 1.0 +package: + name: Custom Model + description: Model with custom configuration + authors: + - Data Science Team +model: + path: model.pkl +datasets: + - path: train.csv + - path: test.csv +code: + - path: train.py +docs: + - path: README.md +""" + with open(os.path.join(modelkit_dir.path, 'Kitfile'), 'w') as f: + f.write(kitfile_content) +``` + +When a `Kitfile` is present, the component uses it instead of generating one. + +### Pipeline with attestation + +```python +@dsl.pipeline( + name='production-pipeline', + description='Production pipeline with attestation', +) +def production_pipeline( + registry: str = 'jozu.ml', + repository: str = 'team/prod-model', + tag: str = 'v1.0.0', + dataset_uri: str = 's3://bucket/data.csv', + code_repo: str = 'github.com/org/repo', + code_commit: str = 'abc123', +): + train = train_model() + + push = push_modelkit( + registry=registry, + repository=repository, + tag=tag, + input_modelkit_dir=train.outputs['modelkit_dir'], + modelkit_name='Production Model', + modelkit_desc='Production model v1.0.0', + modelkit_author='ML Team', + dataset_uri=dataset_uri, + code_repo=code_repo, + code_commit=code_commit, + ) + + kubernetes.use_secret_as_volume( + push, + secret_name='docker-config', + mount_path='/etc/docker-config', + ) + kubernetes.use_secret_as_volume( + push, + secret_name='cosign-keys', + mount_path='/etc/cosign', + ) +``` + +## Secret Requirements + +### Registry credentials + +Create a Kubernetes secret with Docker registry credentials: + +```bash +kubectl create secret generic docker-config \ + --from-file=config.json="$HOME/.docker/config.json" \ + --namespace=kubeflow +``` + +Or: + +```bash +kubectl create secret docker-registry docker-config \ + --docker-server=jozu.ml \ + --docker-username=myuser \ + --docker-password=mypassword \ + --docker-email=user@example.com \ + --namespace=kubeflow +``` + +Mount in your pipeline (as shown above) using: + +```python +kubernetes.use_secret_as_volume( + push, + secret_name='docker-config', + mount_path='/etc/docker-config', +) +``` + +### Cosign keys (optional) + +For ModelKit attestation signing, create a secret with cosign keys: + +```bash +cosign generate-key-pair + +kubectl create secret generic cosign-keys \ + --from-file=cosign.key=cosign.key \ + --from-file=cosign.pub=cosign.pub \ + --namespace=kubeflow +``` + +Mount it as in the attestation pipeline example: + +```python +kubernetes.use_secret_as_volume( + push, + secret_name='cosign-keys', + mount_path='/etc/cosign', +) +``` + +If cosign keys are not available, the signing step logs a warning and continues. + +## Troubleshooting + +### Authentication errors + +**Symptom:** `Failed to push ModelKit` or `401 Unauthorized` + +**Check:** + +```bash +kubectl get secret docker-config -n kubeflow +kubectl get secret docker-config -n kubeflow \ + -o jsonpath='{.data.config\.json}' | base64 -d +``` + +`config.json` should contain registry auth for your host: + +```json +{ + "auths": { + "jozu.ml": { + "auth": "base64(username:password)" + } + } +} +``` + +### Directory not found + +**Symptom:** `ModelKit directory does not exist` + +Ensure your training component creates `modelkit_dir.path` and writes artifacts into it (see `train_model` example above). diff --git a/build/dockerfiles/kubeflow-components/components/push-modelkit/component.yaml b/build/dockerfiles/kubeflow-components/components/push-modelkit/component.yaml new file mode 100644 index 00000000..400f7f70 --- /dev/null +++ b/build/dockerfiles/kubeflow-components/components/push-modelkit/component.yaml @@ -0,0 +1,51 @@ +name: push-modelkit +description: Package and push ML artifacts as a ModelKit + +inputs: + - {name: registry, type: String, description: 'Container registry host (e.g., registry.io)'} + - {name: repository, type: String, description: 'Repository path (e.g., myorg/mymodel)'} + - {name: tag, type: String, default: 'latest', description: 'ModelKit tag'} + + - {name: modelkit_dir, type: String, description: 'Directory containing ML artifacts (with or without Kitfile)'} + + - {name: modelkit_name, type: String, optional: true, description: 'Name for the ModelKit'} + - {name: modelkit_desc, type: String, optional: true, description: 'Description for the ModelKit'} + - {name: modelkit_author, type: String, optional: true, description: 'Author for the ModelKit'} + + - {name: dataset_uri, type: String, optional: true, description: 'Dataset URI'} + - {name: code_repo, type: String, optional: true, description: 'Code repository URL'} + - {name: code_commit, type: String, optional: true, description: 'Code commit hash'} + +outputs: + - {name: uri, type: String, description: 'Full ModelKit URI with digest'} + - {name: digest, type: String, description: 'ModelKit digest (sha256:...)'} + +implementation: + container: + image: kubeflow:dev + command: + - /bin/bash + - -c + - | + /scripts/push-modelkit.sh \ + "$0" "$1" "$2" \ + --modelkit-dir "$3" \ + ${4:+--name "$4"} \ + ${5:+--desc "$5"} \ + ${6:+--author "$6"} \ + ${7:+--dataset-uri "$7"} \ + ${8:+--code-repo "$8"} \ + ${9:+--code-commit "$9"} + - {inputValue: registry} + - {inputValue: repository} + - {inputValue: tag} + - {inputValue: modelkit_dir} + - {inputValue: modelkit_name} + - {inputValue: modelkit_desc} + - {inputValue: modelkit_author} + - {inputValue: dataset_uri} + - {inputValue: code_repo} + - {inputValue: code_commit} + fileOutputs: + uri: /tmp/outputs/uri + digest: /tmp/outputs/digest diff --git a/build/dockerfiles/kubeflow-components/components/unpack-modelkit/component.yaml b/build/dockerfiles/kubeflow-components/components/unpack-modelkit/component.yaml new file mode 100644 index 00000000..d0db3593 --- /dev/null +++ b/build/dockerfiles/kubeflow-components/components/unpack-modelkit/component.yaml @@ -0,0 +1,20 @@ +name: unpack-modelkit +description: Unpack ModelKit artifacts from a registry + +inputs: + - {name: modelkit_uri, type: String, description: 'ModelKit reference (e.g., registry.io/repo:tag or registry.io/repo@sha256:...)'} + - {name: extract_path, type: String, default: '/tmp/model', description: 'Directory to extract ModelKit artifacts'} + +outputs: + - {name: model_path, type: String, description: 'Directory where ModelKit artifacts were extracted'} + +implementation: + container: + image: ghcr.io/kitops-ml/kubeflow:latest + command: + - /bin/bash + - /scripts/unpack-modelkit.sh + - {inputValue: modelkit_uri} + - {inputValue: extract_path} + fileOutputs: + model_path: /tmp/outputs/model_path diff --git a/build/dockerfiles/kubeflow-components/examples/README.md b/build/dockerfiles/kubeflow-components/examples/README.md new file mode 100644 index 00000000..d6c81181 --- /dev/null +++ b/build/dockerfiles/kubeflow-components/examples/README.md @@ -0,0 +1,27 @@ +# Kubeflow ModelKit Component Examples + +This directory contains runnable Kubeflow Pipeline examples that use the `push-modelkit` and `unpack-modelkit` components. + +## House Prices Pipeline (`house-prices-pipeline.py`) + +An end-to-end pipeline that: + +- Trains an XGBoost regression model +- Writes model artifacts into a directory (model, code, docs) +- Packages them as a ModelKit and pushes to an OCI registry +- Optionally adds attestation metadata and cosign signing + +### How to Run + +```bash +pip install kfp==2.14.3 kfp-kubernetes==2.14.3 +python house-prices-pipeline.py +``` + +Upload the generated `house-prices-with-modelkit.yaml` to the Kubeflow UI (or use the KFP SDK) to execute the pipeline. + +### After It Runs + +Use the `kit` CLI to pull, inspect, and unpack the resulting ModelKit. + +For full component reference, integration patterns, secret requirements, and troubleshooting, see the main Kubeflow components README in this directory. diff --git a/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py b/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py new file mode 100644 index 00000000..d6913d34 --- /dev/null +++ b/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py @@ -0,0 +1,245 @@ +""" +Example Kubeflow Pipeline integrating push-modelkit component with house prices training. + +This example demonstrates the directory-based approach where the training component +creates a directory with well-named files (model.pkl, predictions.csv, train.py, README.md) +and the push-modelkit component packages the entire directory as a ModelKit. + +Uses pure KFP v2.14.3 components without v1 compatibility. +""" + +from kfp import dsl, kubernetes +from typing import NamedTuple + + +@dsl.component( + packages_to_install=['pandas', 'xgboost', 'scikit-learn'], + base_image='python:3.11-slim' +) +def train_house_prices( + modelkit_dir: dsl.Output[dsl.Artifact] +): + """Train house prices model with synthetic data.""" + import pandas as pd + import xgboost as xgb + from sklearn.model_selection import train_test_split + from sklearn.datasets import make_regression + import pickle + import os + + # Generate synthetic data + X, y = make_regression(n_samples=1000, n_features=10, noise=10, random_state=42) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Convert to DataFrame + feature_names = [f'feature_{i}' for i in range(X.shape[1])] + X_train_df = pd.DataFrame(X_train, columns=feature_names) + X_test_df = pd.DataFrame(X_test, columns=feature_names) + + # Train model + model = xgb.XGBRegressor(n_estimators=100, max_depth=7, learning_rate=0.1, random_state=42) + model.fit(X_train_df, y_train) + + # Evaluate + train_score = model.score(X_train_df, y_train) + test_score = model.score(X_test_df, y_test) + print(f"Training R² score: {train_score:.4f}") + print(f"Test R² score: {test_score:.4f}") + + # Create directory for modelkit artifacts + os.makedirs(modelkit_dir.path, exist_ok=True) + + # Write files with proper names directly to the directory + model_file = os.path.join(modelkit_dir.path, 'model.pkl') + predictions_file = os.path.join(modelkit_dir.path, 'predictions.csv') + training_script_file = os.path.join(modelkit_dir.path, 'train.py') + readme_file = os.path.join(modelkit_dir.path, 'README.md') + + # Save model using pickle (avoids KFP UTF-8 issues with binary formats) + with open(model_file, 'wb') as f: + pickle.dump(model, f) + + # Generate predictions + predictions = model.predict(X_test_df) + pred_df = pd.DataFrame({'Id': range(len(predictions)), 'Prediction': predictions}) + pred_df.to_csv(predictions_file, index=False) + + # Save training script + with open(training_script_file, 'w') as f: + f.write("""# Training Script + +This model was trained using XGBoost on synthetic regression data. + +## Training Configuration +- Algorithm: XGBoost Gradient Boosting +- n_estimators: 100 +- max_depth: 7 +- learning_rate: 0.1 +- random_state: 42 + +## Data +- Training samples: 800 +- Test samples: 200 +- Features: 10 (synthetic) +""") + + # Generate README + with open(readme_file, 'w') as f: + f.write(f"""# House Prices Demo Model + +## Model Details +- **Framework**: XGBoost {xgb.__version__} +- **Algorithm**: Gradient Boosted Trees +- **Training R² Score**: {train_score:.4f} +- **Test R² Score**: {test_score:.4f} + +## Training Data +- Training samples: {len(X_train)} +- Test samples: {len(X_test)} +- Features: {X.shape[1]} (synthetic) + +## Usage +```python +import pickle +with open('model.pkl', 'rb') as f: + model = pickle.load(f) +predictions = model.predict(X_new) +``` + +--- +Packaged with KitOps +""") + + +@dsl.container_component +def push_modelkit( + registry: str, + repository: str, + tag: str, + output_uri: dsl.Output[dsl.Artifact], + output_digest: dsl.Output[dsl.Artifact], + input_modelkit_dir: dsl.Input[dsl.Artifact], + modelkit_name: str = '', + modelkit_desc: str = '', + modelkit_author: str = '', + dataset_uri: str = '', + code_repo: str = '', + code_commit: str = '' +): + """Package and push model as ModelKit with attestation. + + Outputs: + output_uri: Tagged URI (e.g., jozu.ml/repo:tag) + output_digest: Digest URI (e.g., jozu.ml/repo@sha256:...) + """ + # Build command using safe argument passing + return dsl.ContainerSpec( + image='kubeflow:dev', + command=['/bin/bash', '-c'], + args=[ + ''' + export DOCKER_CONFIG=/home/user/.docker && \ + /scripts/push-modelkit.sh \ + "$0" "$1" "$2" \ + --modelkit-dir "$3" \ + ${4:+--name "$4"} \ + ${5:+--desc "$5"} \ + ${6:+--author "$6"} \ + ${7:+--dataset-uri "$7"} \ + ${8:+--code-repo "$8"} \ + ${9:+--code-commit "$9"} \ + && cp /tmp/outputs/uri "${10}" \ + && cp /tmp/outputs/digest "${11}" + ''', + registry, + repository, + tag, + input_modelkit_dir.path, + modelkit_name, + modelkit_desc, + modelkit_author, + dataset_uri, + code_repo, + code_commit, + output_uri.path, + output_digest.path + ] + ) + +@dsl.pipeline( + name='house-prices-with-modelkit', + description='Train house prices model and package as ModelKit' +) +def house_prices_pipeline( + registry: str = 'jozu.ml', + repository: str = 'demo/house-prices', + tag: str = 'latest', + dataset_source_uri: str = 'synthetic', + code_repo: str = 'github.com/kitops-ml/kitops', + code_commit: str = 'main' +): + """ + Complete pipeline that trains a house prices model and packages it as a ModelKit. + + Args: + registry: Container registry (e.g., jozu.ml) + repository: Repository path for ModelKit (e.g., demo/house-prices) + tag: ModelKit tag + dataset_source_uri: Source URI of dataset for attestation + code_repo: Code repository for attestation + code_commit: Git commit hash for attestation + """ + + # Train model with synthetic data + train = train_house_prices() + + # Package as ModelKit with directory of artifacts + push = push_modelkit( + registry=registry, + repository=repository, + tag=tag, + # Pass directory containing all artifacts + input_modelkit_dir=train.outputs['modelkit_dir'], + # Metadata + modelkit_name='House Prices Demo Model', + modelkit_desc='XGBoost model trained on synthetic data for KitOps demo', + modelkit_author='KitOps Team', + # Attestation metadata + dataset_uri=dataset_source_uri, + code_repo=code_repo, + code_commit=code_commit + ) + + # Mount docker-config secret for registry authentication + kubernetes.use_secret_as_volume( + push, + secret_name='docker-config', + mount_path='/home/user/.docker' + ) + + +if __name__ == '__main__': + import kfp + + # Check KFP version and use appropriate compiler + kfp_version = kfp.__version__ + print(f"Using KFP version: {kfp_version}") + + if kfp_version.startswith('2.'): + # KFP v2 - compile with v1 compatibility + from kfp import compiler + compiler.Compiler().compile( + pipeline_func=house_prices_pipeline, + package_path='house-prices-with-modelkit.yaml' + ) + else: + # KFP v1 + import kfp.compiler as compiler + compiler.Compiler().compile( + pipeline_func=house_prices_pipeline, + pipeline_name='house-prices-with-modelkit', + package_path='house-prices-with-modelkit.yaml' + ) + + print("Pipeline compiled successfully!") + print("Upload house-prices-with-modelkit.yaml to Kubeflow UI") diff --git a/build/dockerfiles/kubeflow-components/examples/train-house-prices.yaml b/build/dockerfiles/kubeflow-components/examples/train-house-prices.yaml new file mode 100644 index 00000000..0c478a42 --- /dev/null +++ b/build/dockerfiles/kubeflow-components/examples/train-house-prices.yaml @@ -0,0 +1,123 @@ +name: train-house-prices +description: Train XGBoost model on synthetic house prices data + +outputs: + - {name: model_path, type: String} + - {name: predictions_path, type: String} + - {name: training_script_path, type: String} + - {name: readme_path, type: String} + +implementation: + container: + image: python:3.11-slim + command: + - sh + - -c + - | + pip install pandas xgboost scikit-learn && python - <<'EOF' + import pandas as pd + import xgboost as xgb + from sklearn.model_selection import train_test_split + from sklearn.datasets import make_regression + import pickle + import os + + # Create output directory + os.makedirs('/outputs', exist_ok=True) + + # Generate synthetic data + X, y = make_regression(n_samples=1000, n_features=10, noise=10, random_state=42) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Convert to DataFrame + feature_names = [f'feature_{i}' for i in range(X.shape[1])] + X_train_df = pd.DataFrame(X_train, columns=feature_names) + X_test_df = pd.DataFrame(X_test, columns=feature_names) + + # Train model + model = xgb.XGBRegressor(n_estimators=100, max_depth=7, learning_rate=0.1, random_state=42) + model.fit(X_train_df, y_train) + + # Evaluate + train_score = model.score(X_train_df, y_train) + test_score = model.score(X_test_df, y_test) + print(f"Training R² score: {train_score:.4f}") + print(f"Test R² score: {test_score:.4f}") + + # Save model + model_path = '/outputs/model.pkl' + with open(model_path, 'wb') as f: + pickle.dump(model, f) + + # Generate predictions + predictions = model.predict(X_test_df) + predictions_path = '/outputs/predictions.csv' + pred_df = pd.DataFrame({'Id': range(len(predictions)), 'Prediction': predictions}) + pred_df.to_csv(predictions_path, index=False) + + # Save training script + training_script_path = '/outputs/train.py' + with open(training_script_path, 'w') as f: + f.write("""# Training Script + + This model was trained using XGBoost on synthetic regression data. + + ## Training Configuration + - Algorithm: XGBoost Gradient Boosting + - n_estimators: 100 + - max_depth: 7 + - learning_rate: 0.1 + - random_state: 42 + + ## Data + - Training samples: 800 + - Test samples: 200 + - Features: 10 (synthetic) + """) + + # Generate README + readme_path = '/outputs/README.md' + with open(readme_path, 'w') as f: + f.write(f"""# House Prices Demo Model + + ## Model Details + - **Framework**: XGBoost {xgb.__version__} + - **Algorithm**: Gradient Boosted Trees + - **Training R² Score**: {train_score:.4f} + - **Test R² Score**: {test_score:.4f} + + ## Training Data + - Training samples: {len(X_train)} + - Test samples: {len(X_test)} + - Features: {X.shape[1]} (synthetic) + + ## Usage + ```python + import pickle + with open('model.pkl', 'rb') as f: + model = pickle.load(f) + predictions = model.predict(X_new) + ``` + + --- + Packaged with KitOps + """) + + # Create KFP outputs directory + os.makedirs('/tmp/kfp/outputs', exist_ok=True) + + # Write output paths for KFP + with open('/tmp/kfp/outputs/model_path', 'w') as f: + f.write(model_path) + with open('/tmp/kfp/outputs/predictions_path', 'w') as f: + f.write(predictions_path) + with open('/tmp/kfp/outputs/training_script_path', 'w') as f: + f.write(training_script_path) + with open('/tmp/kfp/outputs/readme_path', 'w') as f: + f.write(readme_path) + EOF + fileOutputs: + model_path: /tmp/kfp/outputs/model_path + predictions_path: /tmp/kfp/outputs/predictions_path + training_script_path: /tmp/kfp/outputs/training_script_path + readme_path: /tmp/kfp/outputs/readme_path diff --git a/build/dockerfiles/kubeflow-components/scripts/lib/common.sh b/build/dockerfiles/kubeflow-components/scripts/lib/common.sh new file mode 100644 index 00000000..2ce60d37 --- /dev/null +++ b/build/dockerfiles/kubeflow-components/scripts/lib/common.sh @@ -0,0 +1,112 @@ +#!/bin/bash + +# Common library for workflow utilities + +# Environment variables with defaults +export LOG_LEVEL="${LOG_LEVEL:-INFO}" +export REQUEST_ID="${REQUEST_ID:-}" + +# Convert LOG_LEVEL to numeric value: DEBUG=0, INFO=1, WARN=2, ERROR=3 +case "$LOG_LEVEL" in + DEBUG) LOG_LEVEL_VALUE=0 ;; + INFO) LOG_LEVEL_VALUE=1 ;; + WARN) LOG_LEVEL_VALUE=2 ;; + ERROR) LOG_LEVEL_VALUE=3 ;; + *) LOG_LEVEL_VALUE=1 ;; # Default to INFO +esac +export LOG_LEVEL_VALUE + +# Logging functions +log_json() { + local level=$1 + local message=$2 + local extra="${3-}" + if [ -z "$extra" ]; then extra="{}"; fi + + local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + + jq -nc \ + --arg timestamp "$timestamp" \ + --arg level "$level" \ + --arg request_id "${REQUEST_ID:-}" \ + --arg message "$message" \ + --argjson extra "$extra" \ + '{timestamp: $timestamp, level: $level, request_id: $request_id, message: $message, extra: $extra}' +} + +log_debug() { + local extra="${2-}" + if [ -z "$extra" ]; then extra="{}"; fi + [ "$LOG_LEVEL_VALUE" -le 0 ] && log_json "DEBUG" "$1" "$extra" + return 0 +} + +log_info() { + local extra="${2-}" + if [ -z "$extra" ]; then extra="{}"; fi + [ "$LOG_LEVEL_VALUE" -le 1 ] && log_json "INFO" "$1" "$extra" +} + +log_warn() { + local extra="${2-}" + if [ -z "$extra" ]; then extra="{}"; fi + [ "$LOG_LEVEL_VALUE" -le 2 ] && log_json "WARN" "$1" "$extra" +} + +log_error() { + local extra="${2-}" + if [ -z "$extra" ]; then extra="{}"; fi + [ "$LOG_LEVEL_VALUE" -le 3 ] && log_json "ERROR" "$1" "$extra" >&2 +} + +# Print error message and exit +die() { + local extra="${2-}" + if [ -z "$extra" ]; then extra="{}"; fi + log_error "$1" "$extra" + exit 1 +} + +# Retry logic +retry() { + local max_attempts=${1:-3} + local delay=${2:-2} + shift 2 + local attempt=1 + + while [ $attempt -le $max_attempts ]; do + log_debug "Attempting command (attempt $attempt/$max_attempts)" + + if "$@"; then + return 0 + fi + + if [ $attempt -lt $max_attempts ]; then + log_warn "Command failed, retrying in ${delay}s" "{\"attempt\":$attempt}" + sleep $delay + fi + + attempt=$((attempt + 1)) + done + + log_error "Command failed after $max_attempts attempts" + return 1 +} + +# Check required environment variables +require_env() { + for var in "$@"; do + if [ -z "${!var:-}" ]; then + die "Required environment variable not set: $var" + fi + done +} + +# Check required commands +require_cmd() { + for cmd in "$@"; do + if ! command -v "$cmd" &> /dev/null; then + die "Required command not found: $cmd" + fi + done +} diff --git a/build/dockerfiles/kubeflow-components/scripts/push-modelkit.sh b/build/dockerfiles/kubeflow-components/scripts/push-modelkit.sh new file mode 100755 index 00000000..f86c383b --- /dev/null +++ b/build/dockerfiles/kubeflow-components/scripts/push-modelkit.sh @@ -0,0 +1,232 @@ +#!/bin/bash +set -euo pipefail + +# Usage: /scripts/push-modelkit.sh --modelkit-dir [options] +# +# Arguments: +# Container registry host (e.g., jozu.ml) +# Repository path (e.g., myorg/mymodel) +# ModelKit tag +# --modelkit-dir Directory with ML artifacts (with or without Kitfile) +# +# Options: +# --name ModelKit name +# --desc ModelKit description +# --author ModelKit author +# --dataset-uri Dataset URI +# --code-repo Code repository +# --code-commit Code commit +# +# Environment variables: `DOCKER_CONFIG` (path to .docker directory containing config.json) + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/lib/common.sh" + +# Initialize variables +REGISTRY="" +REPOSITORY="" +TAG="" +MODELKIT_DIR="" +MODELKIT_NAME="" +MODELKIT_DESC="" +MODELKIT_AUTHOR="" +DATASET_URI="" +CODE_REPO="" +CODE_COMMIT="" + +# Parse arguments +if [ $# -lt 3 ]; then + die "Usage: $0 --modelkit-dir [options]" +fi + +# First three args are positional +REGISTRY="$1" +REPOSITORY="$2" +TAG="$3" +shift 3 + +# Parse optional arguments +while [[ $# -gt 0 ]]; do + case $1 in + --modelkit-dir) + MODELKIT_DIR="$2" + shift 2 + ;; + --name) + MODELKIT_NAME="$2" + shift 2 + ;; + --desc) + MODELKIT_DESC="$2" + shift 2 + ;; + --author) + MODELKIT_AUTHOR="$2" + shift 2 + ;; + --dataset-uri) + DATASET_URI="$2" + shift 2 + ;; + --code-repo) + CODE_REPO="$2" + shift 2 + ;; + --code-commit) + CODE_COMMIT="$2" + shift 2 + ;; + *) + die "Unknown argument: $1" + ;; + esac +done + +# Validate required arguments +if [ -z "$MODELKIT_DIR" ]; then + die "Must specify --modelkit-dir" +fi + +if [ ! -d "$MODELKIT_DIR" ]; then + die "ModelKit directory not found: $MODELKIT_DIR" +fi + +# Construct ModelKit URI +MODELKIT_URI="${REGISTRY}/${REPOSITORY}:${TAG}" + +log_info "Starting ModelKit push" "{\"uri\":\"$MODELKIT_URI\"}" + +require_cmd kit cosign jq +require_env DOCKER_CONFIG + +# Disable kit update notifications +kit version --show-update-notifications=false >/dev/null 2>&1 || true + +# Create output directory +mkdir -p /tmp/outputs + +# Use the provided directory as working directory +WORK_DIR="$MODELKIT_DIR" + +log_info "Using ModelKit directory" "{\"dir\":\"$MODELKIT_DIR\"}" + +# Check if Kitfile exists, if not run kit init +if [ ! -f "$WORK_DIR/Kitfile" ] && [ ! -f "$WORK_DIR/kitfile" ] && [ ! -f "$WORK_DIR/.kitfile" ]; then + log_info "No Kitfile found, running kit init" + + INIT_ARGS=() + [ -n "$MODELKIT_NAME" ] && INIT_ARGS+=(--name "$MODELKIT_NAME") + [ -n "$MODELKIT_DESC" ] && INIT_ARGS+=(--desc "$MODELKIT_DESC") + [ -n "$MODELKIT_AUTHOR" ] && INIT_ARGS+=(--author "$MODELKIT_AUTHOR") + + kit init "$WORK_DIR" ${INIT_ARGS[@]+"${INIT_ARGS[@]}"} || die "Failed to initialize Kitfile" +else + log_info "Found existing Kitfile" +fi + +# Pack the ModelKit +log_info "Packing ModelKit artifacts" +kit pack "$WORK_DIR" -t "$MODELKIT_URI" || die "Failed to pack ModelKit" + +# Push to registry with retry +log_info "Pushing to registry" +retry 3 2 kit push "$MODELKIT_URI" || die "Failed to push ModelKit" + +# Extract digest from kit inspect +log_debug "Extracting digest" +MODELKIT_DIGEST=$(echo "$MODELKIT_URI" | grep -oE '@sha256:[a-f0-9]+' | sed 's/@sha256://' || echo "") + +if [ -z "$MODELKIT_DIGEST" ]; then + log_debug "No digest in URI, fetching from registry" + + set +e + INSPECT_OUTPUT=$(kit inspect "$MODELKIT_URI" --remote 2>&1) + INSPECT_EXIT_CODE=$? + set -e + + log_debug "Kit inspect completed" "{\"exit_code\":$INSPECT_EXIT_CODE}" + + if [ $INSPECT_EXIT_CODE -eq 0 ]; then + # Extract digest from JSON output, filtering out any log lines + MODELKIT_DIGEST=$(echo "$INSPECT_OUTPUT" | grep -v '^{"timestamp"' | jq -r '.digest' 2>/dev/null | sed 's/sha256://' || echo "") + fi + + if [ -z "$MODELKIT_DIGEST" ]; then + die "Could not determine ModelKit digest" "{\"reference\":\"$MODELKIT_URI\",\"exit_code\":$INSPECT_EXIT_CODE}" + fi +fi + +log_debug "ModelKit digest: $MODELKIT_DIGEST" + +# Construct full URI with digest +FULL_URI="${REGISTRY}/${REPOSITORY}@sha256:${MODELKIT_DIGEST}" + +log_info "Push completed" "{\"uri\":\"$FULL_URI\"}" + +# Create in-toto attestation predicate +ATTESTATION_PREDICATE=$(jq -nc \ + --arg uri "$FULL_URI" \ + --arg digest "sha256:$MODELKIT_DIGEST" \ + --arg dataset_uri "$DATASET_URI" \ + --arg code_repo "$CODE_REPO" \ + --arg code_commit "$CODE_COMMIT" \ + --arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ + '{ + predicateType: "https://kitops.ml/attestation/v1", + predicate: { + modelkit: { + uri: $uri, + digest: $digest + }, + metadata: { + dataset_uri: $dataset_uri, + code_repo: $code_repo, + code_commit: $code_commit, + created_at: $timestamp + } + } + }') + +log_debug "Created attestation predicate" + +# Sign with cosign (non-fatal) +if [ -f "/etc/cosign/cosign.key" ]; then + log_info "Signing and attaching attestation" + + PREDICATE_FILE=$(mktemp) + echo "$ATTESTATION_PREDICATE" > "$PREDICATE_FILE" + + if retry 3 2 cosign attest \ + --key /etc/cosign/cosign.key \ + --predicate "$PREDICATE_FILE" \ + --tlog-upload=false \ + --yes \ + "$FULL_URI" 2>&1; then + log_info "Signed with cosign" + else + log_warn "Failed to sign with cosign, continuing" + fi + + rm -f "$PREDICATE_FILE" +else + log_warn "No cosign key found at /etc/cosign/cosign.key, skipping signing" +fi + +# Output results +# Write to KFP output files +echo -n "$MODELKIT_URI" > /tmp/outputs/uri # Tagged URI (e.g., jozu.ml/repo:tag) +echo -n "$FULL_URI" > /tmp/outputs/digest # Digest URI (e.g., jozu.ml/repo@sha256:...) + +# Output JSON to stdout +jq -n \ + --arg uri "$FULL_URI" \ + --arg digest "sha256:$MODELKIT_DIGEST" \ + --arg timestamp "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \ + '{ + "uri": $uri, + "digest": $digest, + "timestamp": $timestamp, + "status": "success" + }' + +log_info "Push workflow completed" diff --git a/build/dockerfiles/kubeflow-components/scripts/unpack-modelkit.sh b/build/dockerfiles/kubeflow-components/scripts/unpack-modelkit.sh new file mode 100755 index 00000000..b3ee2146 --- /dev/null +++ b/build/dockerfiles/kubeflow-components/scripts/unpack-modelkit.sh @@ -0,0 +1,53 @@ +#!/bin/bash +set -euo pipefail + +# Usage: /scripts/unpack-modelkit.sh [extract_path] +# Environment variables: `DOCKER_CONFIG` (path to .docker directory containing config.json) +# Unpacks ModelKit artifacts to a directory + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${SCRIPT_DIR}/lib/common.sh" + +# Validate arguments +if [ $# -lt 1 ]; then + die "Usage: $0 [extract_path]" +fi + +MODELKIT_URI="$1" +EXTRACT_PATH="${2:-/tmp/model}" + +log_info "Starting unpack" "{\"modelkit_uri\":\"$MODELKIT_URI\",\"extract_path\":\"$EXTRACT_PATH\"}" + +require_cmd kit jq +require_env DOCKER_CONFIG + +# Disable kit update notifications to keep output clean +kit version --show-update-notifications=false >/dev/null 2>&1 || true + +# Create output directory +mkdir -p /tmp/outputs +mkdir -p "$EXTRACT_PATH" + +# Step 1: Unpack ModelKit with retry +log_info "Unpacking" +retry 3 2 kit unpack "$MODELKIT_URI" -d "$EXTRACT_PATH" || die "Failed to unpack ModelKit" + +log_info "Unpacked successfully" "{\"path\":\"$EXTRACT_PATH\"}" + +# Output results +# Write to KFP output file +echo -n "$EXTRACT_PATH" > /tmp/outputs/model_path + +# Output JSON to stdout +jq -n \ + --arg path "$EXTRACT_PATH" \ + --arg uri "$MODELKIT_URI" \ + --arg timestamp "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \ + '{ + "model_path": $path, + "modelkit_uri": $uri, + "timestamp": $timestamp, + "status": "success" + }' + +log_info "Unpack workflow completed" diff --git a/build/dockerfiles/kubeflow-components/tests/push-modelkit.bats b/build/dockerfiles/kubeflow-components/tests/push-modelkit.bats new file mode 100755 index 00000000..93dd9796 --- /dev/null +++ b/build/dockerfiles/kubeflow-components/tests/push-modelkit.bats @@ -0,0 +1,337 @@ +#!/usr/bin/env bats + +# Path to the script under test +SCRIPT_PATH="${BATS_TEST_DIRNAME}/../scripts/push-modelkit.sh" + +setup() { + # Create temporary directory for tests + export TEST_DIR="$(mktemp -d)" + export MODEL_DIR="$TEST_DIR/model" + export OUTPUT_DIR="/tmp/outputs" + export LOG_LEVEL="INFO" + export REQUEST_ID="test-push-modelkit" + export DOCKER_CONFIG="$TEST_DIR/.docker" + + # Create mock model directory with Kitfile + mkdir -p "$MODEL_DIR" + echo "mock model content" > "$MODEL_DIR/model.bin" + cat > "$MODEL_DIR/Kitfile" << 'KITFILEEOF' +manifestVersion: 1.0 +package: + name: test-model +model: + path: model.bin +KITFILEEOF + + # Create mock docker config + mkdir -p "$DOCKER_CONFIG" + cat > "$DOCKER_CONFIG/config.json" << 'DOCKEREOF' +{"auths":{"registry.io":{"auth":"TU9DS19VU0VSOk1PQ0tfUEFTU1dPUkQ="}}} # Mock auth is base64("MOCK_USER:MOCK_PASSWORD") +DOCKEREOF + + + # Create output directory + mkdir -p "$OUTPUT_DIR" + + # Mock commands + export PATH="$TEST_DIR/bin:$PATH" + mkdir -p "$TEST_DIR/bin" + + # Create mock kit command + cat > "$TEST_DIR/bin/kit" << 'EOF' +#!/bin/bash +# Mock kit command for testing + +if [[ "$1" == "version" ]]; then + echo "kitops version v1.0.0" + exit 0 +fi + +if [[ "$1" == "init" ]]; then + # Mock kit init - create a basic Kitfile + # Handle: kit init [--name NAME] [--desc DESC] [--author AUTHOR] + dir="$2" + shift 2 + # Consume optional flags + while [[ $# -gt 0 ]]; do + case $1 in + --name|--desc|--author) + shift 2 # Skip flag and value + ;; + *) + shift + ;; + esac + done + cat > "$dir/Kitfile" << 'INITEOF' +manifestVersion: 1.0 +package: + name: auto-generated +model: + path: model.bin +INITEOF + exit 0 +fi + +if [[ "$1" == "pack" ]]; then + # Mock pack output + echo "Packing model..." + exit 0 +fi + +if [[ "$1" == "push" ]]; then + # Mock push output with digest + echo "Pushed to registry" + echo "Digest: sha256:abc123def456789012345678901234567890123456789012345678901234" + exit 0 +fi + +if [[ "$1" == "inspect" ]]; then + # Mock inspect output + cat << 'INSPECTEOF' +{"digest":"sha256:abc123def456789012345678901234567890123456789012345678901234"} +INSPECTEOF + exit 0 +fi + +exit 1 +EOF + chmod +x "$TEST_DIR/bin/kit" + + # Create mock cosign command + cat > "$TEST_DIR/bin/cosign" << 'EOF' +#!/bin/bash +if [[ "$1" == "attest" ]]; then + echo "Signing attestation..." + exit 0 +fi +exit 1 +EOF + chmod +x "$TEST_DIR/bin/cosign" + + # Create mock jq command + cat > "$TEST_DIR/bin/jq" << 'EOF' +#!/bin/bash +exec /usr/bin/jq "$@" +EOF + chmod +x "$TEST_DIR/bin/jq" + + # Create failing kit command for error tests + cat > "$TEST_DIR/bin/kit-fail" << 'EOF' +#!/bin/bash +exit 1 +EOF + chmod +x "$TEST_DIR/bin/kit-fail" +} + +teardown() { + # Clean up temporary directory + rm -rf "$TEST_DIR" + rm -rf "$OUTPUT_DIR" + unset MODEL_DIR + unset OUTPUT_DIR + unset LOG_LEVEL + unset REQUEST_ID + unset DOCKER_CONFIG +} + +# Argument validation tests + +@test "fails when no arguments provided" { + run bash "$SCRIPT_PATH" + [ "$status" -eq 1 ] + [[ "$output" =~ "Usage:" ]] +} + +@test "fails when only registry provided" { + run bash "$SCRIPT_PATH" "registry.io" + [ "$status" -eq 1 ] + [[ "$output" =~ "Usage:" ]] +} + +@test "fails when only registry and repository provided" { + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" + [ "$status" -eq 1 ] + [[ "$output" =~ "Usage:" ]] +} + +@test "fails when no modelkit-dir specified" { + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" + [ "$status" -eq 1 ] + [[ "$output" =~ "Must specify --modelkit-dir" ]] +} + +# Directory mode tests + +@test "successfully packs and pushes from directory with Kitfile" { + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 0 ] + [[ "$output" =~ "Using ModelKit directory" ]] + [[ "$output" =~ "Packing ModelKit" ]] + [[ "$output" =~ "Pushing to registry" ]] +} + +@test "runs kit init when no Kitfile present" { + # Remove Kitfile + rm "$MODEL_DIR/Kitfile" + + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 0 ] + [[ "$output" =~ "No Kitfile found" ]] + [[ "$output" =~ "running kit init" ]] +} + +@test "recognizes lowercase kitfile" { + # Replace Kitfile with lowercase kitfile + mv "$MODEL_DIR/Kitfile" "$MODEL_DIR/kitfile" + + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 0 ] + [[ ! "$output" =~ "No Kitfile found" ]] + [[ "$output" =~ "Packing ModelKit" ]] +} + +@test "recognizes dotfile .kitfile" { + # Replace Kitfile with .kitfile + mv "$MODEL_DIR/Kitfile" "$MODEL_DIR/.kitfile" + + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 0 ] + [[ ! "$output" =~ "No Kitfile found" ]] + [[ "$output" =~ "Packing ModelKit" ]] +} + +@test "fails when directory does not exist" { + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "/nonexistent" + [ "$status" -eq 1 ] + [[ "$output" =~ "ModelKit directory not found" ]] +} + +@test "handles directory with spaces" { + model_with_spaces="$TEST_DIR/model with spaces" + mkdir -p "$model_with_spaces" + echo "mock" > "$model_with_spaces/model.bin" + cat > "$model_with_spaces/Kitfile" << 'EOF' +manifestVersion: 1.0 +model: + path: model.bin +EOF + + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$model_with_spaces" + [ "$status" -eq 0 ] +} + +@test "passes metadata to kit init when no Kitfile exists" { + # Remove Kitfile + rm "$MODEL_DIR/Kitfile" + + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" \ + --modelkit-dir "$MODEL_DIR" \ + --name "My Model" \ + --desc "Test model" \ + --author "Test Author" + [ "$status" -eq 0 ] + [[ "$output" =~ "running kit init" ]] +} + +# Output validation tests + +@test "creates output files in /tmp/outputs" { + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 0 ] + [ -f "$OUTPUT_DIR/uri" ] + [ -f "$OUTPUT_DIR/digest" ] +} + +@test "output files contain correct values" { + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 0 ] + + uri_content=$(cat "$OUTPUT_DIR/uri") + digest_content=$(cat "$OUTPUT_DIR/digest") + + [[ "$uri_content" =~ registry.io/myorg/mymodel@sha256: ]] + [[ "$digest_content" =~ sha256:abc123def456 ]] +} + +@test "returns valid JSON output" { + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 0 ] + + # Extract final JSON output + json_output=$(echo "$output" | awk '/^{$/,/^}$/' | jq -s '.[] | select(.status != null)') + echo "$json_output" | jq -e '.uri' + echo "$json_output" | jq -e '.digest' + echo "$json_output" | jq -e '.status == "success"' +} + +# Attestation metadata tests + +@test "accepts attestation metadata flags" { + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" \ + --modelkit-dir "$MODEL_DIR" \ + --dataset-uri "s3://bucket/data" \ + --code-repo "github.com/org/repo" \ + --code-commit "abc123" + [ "$status" -eq 0 ] +} + +# Cosign signing tests + +@test "signs with cosign when key exists" { + mkdir -p /tmp/etc/cosign + echo "mock-key" > /tmp/etc/cosign/cosign.key + + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 0 ] + + rm -rf /tmp/etc/cosign +} + +@test "warns when cosign key not found" { + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 0 ] + [[ "$output" =~ "No cosign key found" ]] || [[ "$output" =~ "skipping signing" ]] +} + +# Error handling tests + +@test "fails when kit command is not found" { + export PATH="/usr/bin:/bin" + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 1 ] + [[ "$output" =~ "Required command not found: kit" ]] +} + +@test "fails when DOCKER_CONFIG not set" { + unset DOCKER_CONFIG + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 1 ] + [[ "$output" =~ "Required environment variable not set: DOCKER_CONFIG" ]] +} + +@test "retries on push failure and eventually fails" { + # Replace kit with failing version + mv "$TEST_DIR/bin/kit" "$TEST_DIR/bin/kit.bak" + mv "$TEST_DIR/bin/kit-fail" "$TEST_DIR/bin/kit" + + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 1 ] +} + +# Edge case tests + +@test "handles registry with port number" { + run bash "$SCRIPT_PATH" "registry.io:5000" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 0 ] +} + +@test "handles repository with nested path" { + run bash "$SCRIPT_PATH" "registry.io" "myorg/team/project/mymodel" "v1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 0 ] +} + +@test "handles tag with special characters" { + run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1.0.0-rc1" --modelkit-dir "$MODEL_DIR" + [ "$status" -eq 0 ] +} diff --git a/build/dockerfiles/kubeflow-components/tests/unpack-modelkit.bats b/build/dockerfiles/kubeflow-components/tests/unpack-modelkit.bats new file mode 100755 index 00000000..e5870b11 --- /dev/null +++ b/build/dockerfiles/kubeflow-components/tests/unpack-modelkit.bats @@ -0,0 +1,188 @@ +#!/usr/bin/env bats + +# Path to the script under test +SCRIPT_PATH="${BATS_TEST_DIRNAME}/../scripts/unpack-modelkit.sh" + +setup() { + # Create temporary directory for tests + export TEST_DIR="$(mktemp -d)" + export EXTRACT_DIR="$TEST_DIR/extract" + export OUTPUT_DIR="/tmp/outputs" + export LOG_LEVEL="INFO" + export REQUEST_ID="test-unpack-modelkit" + export DOCKER_CONFIG="$TEST_DIR/.docker" + + # Create mock docker config + mkdir -p "$DOCKER_CONFIG" + cat > "$DOCKER_CONFIG/config.json" << 'DOCKEREOF' +{"auths":{"registry.io":{"auth":"TU9DS19VU0VSOk1PQ0tfUEFTU1dPUkQ="}}} # base64("MOCK_USER:MOCK_PASSWORD") +DOCKEREOF + + + # Create output directory + mkdir -p "$OUTPUT_DIR" + + # Mock commands + export PATH="$TEST_DIR/bin:$PATH" + mkdir -p "$TEST_DIR/bin" + + # Create mock kit command + cat > "$TEST_DIR/bin/kit" << 'EOF' +#!/bin/bash +# Mock kit command for testing + +if [[ "$1" == "version" ]]; then + echo "kitops version v1.0.0" + exit 0 +fi + +if [[ "$1" == "unpack" ]]; then + reference="$2" + # Parse -d flag for directory + shift 2 + while [[ $# -gt 0 ]]; do + case $1 in + -d) + dir="$2" + shift 2 + ;; + *) + shift + ;; + esac + done + + # Create mock unpacked content + mkdir -p "$dir" + echo "mock model content" > "$dir/model.bin" + echo "mock kitfile" > "$dir/Kitfile" + exit 0 +fi + +exit 1 +EOF + chmod +x "$TEST_DIR/bin/kit" + + # Create mock jq command + cat > "$TEST_DIR/bin/jq" << 'EOF' +#!/bin/bash +# Forward to real jq +exec /usr/bin/jq "$@" +EOF + chmod +x "$TEST_DIR/bin/jq" + + # Create failing kit command for error tests + cat > "$TEST_DIR/bin/kit-fail" << 'EOF' +#!/bin/bash +exit 1 +EOF + chmod +x "$TEST_DIR/bin/kit-fail" +} + +teardown() { + # Clean up temporary directory + rm -rf "$TEST_DIR" + rm -rf "$OUTPUT_DIR" + unset EXTRACT_DIR + unset OUTPUT_DIR + unset LOG_LEVEL + unset REQUEST_ID + unset DOCKER_CONFIG +} + +# Argument validation tests + +@test "fails when no arguments provided" { + run bash "$SCRIPT_PATH" + [ "$status" -eq 1 ] + [[ "$output" =~ "Usage:" ]] + [[ "$output" =~ "modelkit_uri" ]] +} + +@test "succeeds with only modelkit_uri (uses default extract path)" { + run bash "$SCRIPT_PATH" "registry.io/myorg/mymodel:v1" + [ "$status" -eq 0 ] + [[ "$output" =~ "Unpack workflow completed" ]] +} + +# ModelKit unpack tests + +@test "successfully unpacks modelkit to specified path" { + run bash "$SCRIPT_PATH" "registry.io/myorg/mymodel:v1" "$EXTRACT_DIR" + [ "$status" -eq 0 ] + [[ "$output" =~ "Starting ModelKit unpack workflow" ]] + [[ "$output" =~ "Unpacking ModelKit" ]] + [ -f "$EXTRACT_DIR/model.bin" ] +} + +@test "creates output file in /tmp/outputs" { + run bash "$SCRIPT_PATH" "registry.io/myorg/mymodel:v1" "$EXTRACT_DIR" + [ "$status" -eq 0 ] + [ -f "$OUTPUT_DIR/model_path" ] +} + +@test "output file contains correct value" { + run bash "$SCRIPT_PATH" "registry.io/myorg/mymodel:v1" "$EXTRACT_DIR" + [ "$status" -eq 0 ] + + path_content=$(cat "$OUTPUT_DIR/model_path") + [[ "$path_content" == "$EXTRACT_DIR" ]] +} + +@test "returns valid JSON output" { + run bash "$SCRIPT_PATH" "registry.io/myorg/mymodel:v1" "$EXTRACT_DIR" + [ "$status" -eq 0 ] + + # Extract final JSON output (the one with "status" field) + json_output=$(echo "$output" | awk '/^{$/,/^}$/' | jq -s '.[] | select(.status != null)') + echo "$json_output" | jq -e '.model_path' + echo "$json_output" | jq -e '.modelkit_uri' + echo "$json_output" | jq -e '.status == "success"' +} + +# Error handling tests + +@test "fails when kit command is not found" { + export PATH="/usr/bin:/bin" + run bash "$SCRIPT_PATH" "registry.io/myorg/mymodel:v1" "$EXTRACT_DIR" + [ "$status" -eq 1 ] + [[ "$output" =~ "Required command not found: kit" ]] +} + +@test "fails when DOCKER_CONFIG not set" { + unset DOCKER_CONFIG + run bash "$SCRIPT_PATH" "registry.io/myorg/mymodel:v1" "$EXTRACT_DIR" + [ "$status" -eq 1 ] + [[ "$output" =~ "Required environment variable not set: DOCKER_CONFIG" ]] +} + +@test "retries on unpack failure and eventually fails" { + # Replace kit with failing version + mv "$TEST_DIR/bin/kit" "$TEST_DIR/bin/kit.bak" + mv "$TEST_DIR/bin/kit-fail" "$TEST_DIR/bin/kit" + + run bash "$SCRIPT_PATH" "registry.io/myorg/mymodel:v1" "$EXTRACT_DIR" + [ "$status" -eq 1 ] +} + +# Edge case tests + +@test "handles paths with spaces" { + extract_with_spaces="$TEST_DIR/extract with spaces" + + run bash "$SCRIPT_PATH" "registry.io/myorg/mymodel:v1" "$extract_with_spaces" + [ "$status" -eq 0 ] + [ -d "$extract_with_spaces" ] + [ -f "$extract_with_spaces/model.bin" ] +} + +# Integration tests + +@test "creates extract directory if it does not exist" { + nonexistent="$TEST_DIR/nonexistent/deep/path" + + run bash "$SCRIPT_PATH" "registry.io/myorg/mymodel:v1" "$nonexistent" + [ "$status" -eq 0 ] + [ -d "$nonexistent" ] + [ -f "$nonexistent/model.bin" ] +} From 027db5621dc9648168f3f4e167da368df132e113 Mon Sep 17 00:00:00 2001 From: Gorkem Ercan Date: Fri, 28 Nov 2025 13:22:08 -0500 Subject: [PATCH 2/5] Add workflows for kubeflow components testing and release Signed-off-by: Gorkem Ercan --- .../workflows/kubeflow-components-test.yaml | 72 +++++++++++++++++++ .github/workflows/next-container-build.yaml | 18 +++++ .github/workflows/platform-release.yaml | 26 +++++++ 3 files changed, 116 insertions(+) create mode 100644 .github/workflows/kubeflow-components-test.yaml diff --git a/.github/workflows/kubeflow-components-test.yaml b/.github/workflows/kubeflow-components-test.yaml new file mode 100644 index 00000000..3f25086b --- /dev/null +++ b/.github/workflows/kubeflow-components-test.yaml @@ -0,0 +1,72 @@ +name: Kubeflow Components Tests + +on: + pull_request: + paths: + - 'build/dockerfiles/kubeflow-components/**' + - '.github/workflows/kubeflow-components-test.yaml' + push: + branches: + - main + paths: + - 'build/dockerfiles/kubeflow-components/**' + - '.github/workflows/kubeflow-components-test.yaml' + +jobs: + test-scripts: + name: Run BATS tests + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + + - name: Install BATS + run: | + sudo apt-get update + sudo apt-get install -y bats + + - name: Install jq + run: | + sudo apt-get install -y jq + + - name: Run push-modelkit tests + working-directory: build/dockerfiles/kubeflow-components + run: bats tests/push-modelkit.bats + + - name: Run unpack-modelkit tests + working-directory: build/dockerfiles/kubeflow-components + run: bats tests/unpack-modelkit.bats + + test-container-build: + name: Test container build + runs-on: ubuntu-latest + steps: + - name: Set up QEMU + uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1 + + - name: Checkout + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + + - name: Build base kit container (for build-arg) + id: build-kit-base + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 + with: + platforms: linux/amd64 + push: false + context: . + file: build/dockerfiles/Dockerfile + load: true + tags: ghcr.io/kitops-ml/kitops:test + + - name: Check kubeflow components container build + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 + with: + platforms: linux/amd64,linux/arm64 + push: false + context: build/dockerfiles/kubeflow-components + file: build/dockerfiles/kubeflow-components/Dockerfile + build-args: | + KIT_BASE_IMAGE=ghcr.io/kitops-ml/kitops:test diff --git a/.github/workflows/next-container-build.yaml b/.github/workflows/next-container-build.yaml index 59133522..bfbe6466 100644 --- a/.github/workflows/next-container-build.yaml +++ b/.github/workflows/next-container-build.yaml @@ -12,6 +12,7 @@ env: IMAGE_NAME: ${{ github.repository }} INIT_IMAGE_NAME: ${{ github.repository }}-init KIT_SERVE_IMAGE: ${{ github.repository }}-kserve + KUBEFLOW_IMAGE: ${{ github.repository }}-kubeflow NEXT_TAG: next permissions: @@ -90,3 +91,20 @@ jobs: index:org.opencontainers.image.description=KitOps KServe container index:org.opencontainers.image.source=https://github.com/kitops-ml/kitops index:org.opencontainers.image.licenses=Apache-2.0 + + - name: Build and push Kubeflow Pipeline components container + id: build-kubeflow-container + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 + with: + platforms: linux/amd64,linux/arm64 + push: true + context: build/dockerfiles/kubeflow-components + file: build/dockerfiles/kubeflow-components/Dockerfile + build-args: | + KIT_BASE_IMAGE=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@${{ steps.build-kit-container.outputs.digest }} + tags: | + ${{ env.REGISTRY }}/${{ env.KUBEFLOW_IMAGE }}:${{ env.NEXT_TAG }} + annotations: | + index:org.opencontainers.image.description=KitOps Kubeflow Pipeline Components + index:org.opencontainers.image.source=https://github.com/kitops-ml/kitops + index:org.opencontainers.image.licenses=Apache-2.0 diff --git a/.github/workflows/platform-release.yaml b/.github/workflows/platform-release.yaml index 4cc834e0..9a9588c8 100644 --- a/.github/workflows/platform-release.yaml +++ b/.github/workflows/platform-release.yaml @@ -21,6 +21,7 @@ env: IMAGE_NAME: ${{ github.repository }} INIT_IMAGE_NAME: ${{ github.repository }}-init KIT_SERVE_IMAGE: ${{ github.repository }}-kserve + KUBEFLOW_IMAGE: ${{ github.repository }}-kubeflow permissions: contents: write @@ -431,6 +432,24 @@ jobs: index:org.opencontainers.image.source=https://github.com/kitops-ml/kitops index:org.opencontainers.image.licenses=Apache-2.0 + - name: Build and push Kubeflow Pipeline components container + id: build-kubeflow-container + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 + with: + platforms: linux/amd64,linux/arm64 + push: true + context: build/dockerfiles/kubeflow-components + file: build/dockerfiles/kubeflow-components/Dockerfile + build-args: | + KIT_BASE_IMAGE=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@${{ steps.build-kit-container.outputs.digest }} + tags: | + ${{ env.REGISTRY }}/${{ env.KUBEFLOW_IMAGE }}:latest + ${{ env.REGISTRY }}/${{ env.KUBEFLOW_IMAGE }}:${{ github.ref_name }} + annotations: | + index:org.opencontainers.image.description=KitOps Kubeflow Pipeline Components + index:org.opencontainers.image.source=https://github.com/kitops-ml/kitops + index:org.opencontainers.image.licenses=Apache-2.0 + - name: Generate artifact attestation for base container uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3.0.0 with: @@ -451,3 +470,10 @@ jobs: subject-name: ${{ env.REGISTRY }}/${{ env.KIT_SERVE_IMAGE }} subject-digest: ${{ steps.build-kit-serve-container.outputs.digest }} push-to-registry: true + + - name: Generate artifact attestation for kubeflow container + uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3.0.0 + with: + subject-name: ${{ env.REGISTRY }}/${{ env.KUBEFLOW_IMAGE }} + subject-digest: ${{ steps.build-kubeflow-container.outputs.digest }} + push-to-registry: true From 4844d9797eba2868c5315270fa78d9d08d3b449c Mon Sep 17 00:00:00 2001 From: Gorkem Ercan Date: Fri, 28 Nov 2025 13:33:10 -0500 Subject: [PATCH 3/5] Fix failing CI jobs Signed-off-by: Gorkem Ercan --- .github/workflows/kubeflow-components-test.yaml | 13 +------------ build/dockerfiles/kubeflow-components/README.md | 12 ++++++------ .../components/push-modelkit/component.yaml | 6 +++--- .../components/unpack-modelkit/component.yaml | 2 +- .../examples/house-prices-pipeline.py | 5 +---- .../kubeflow-components/tests/push-modelkit.bats | 4 ++-- .../kubeflow-components/tests/unpack-modelkit.bats | 4 ++-- 7 files changed, 16 insertions(+), 30 deletions(-) diff --git a/.github/workflows/kubeflow-components-test.yaml b/.github/workflows/kubeflow-components-test.yaml index 3f25086b..ab1c2fb2 100644 --- a/.github/workflows/kubeflow-components-test.yaml +++ b/.github/workflows/kubeflow-components-test.yaml @@ -50,17 +50,6 @@ jobs: - name: Checkout uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 - - name: Build base kit container (for build-arg) - id: build-kit-base - uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 - with: - platforms: linux/amd64 - push: false - context: . - file: build/dockerfiles/Dockerfile - load: true - tags: ghcr.io/kitops-ml/kitops:test - - name: Check kubeflow components container build uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 with: @@ -69,4 +58,4 @@ jobs: context: build/dockerfiles/kubeflow-components file: build/dockerfiles/kubeflow-components/Dockerfile build-args: | - KIT_BASE_IMAGE=ghcr.io/kitops-ml/kitops:test + KIT_BASE_IMAGE=ghcr.io/kitops-ml/kitops:next diff --git a/build/dockerfiles/kubeflow-components/README.md b/build/dockerfiles/kubeflow-components/README.md index 546151f5..bd926cbf 100644 --- a/build/dockerfiles/kubeflow-components/README.md +++ b/build/dockerfiles/kubeflow-components/README.md @@ -31,8 +31,8 @@ If a `Kitfile` exists in `modelkit_dir`, it is used as-is. Otherwise, one is aut **Outputs** -- `uri` – Full ModelKit URI with digest (e.g., `registry.io/myorg/mymodel@sha256:abc…`) -- `digest` – ModelKit digest (e.g., `sha256:abc…`) +- `uri` – Tagged ModelKit URI (e.g., `registry.io/myorg/mymodel:v1`) +- `digest` – Digest-based ModelKit URI (e.g., `registry.io/myorg/mymodel@sha256:abc…`) ### unpack-modelkit @@ -99,7 +99,7 @@ def push_modelkit( code_commit: str = '', ): return dsl.ContainerSpec( - image='ghcr.io/kitops-ml/kubeflow:latest', + image='ghcr.io/kitops-ml/kitops-kubeflow:latest', command=['/bin/bash', '-c'], args=[ f'/scripts/push-modelkit.sh ' @@ -144,7 +144,7 @@ def simple_pipeline( kubernetes.use_secret_as_volume( push, secret_name='docker-config', - mount_path='/etc/docker-config', + mount_path='/home/user/.docker', ) ``` @@ -216,7 +216,7 @@ def production_pipeline( kubernetes.use_secret_as_volume( push, secret_name='docker-config', - mount_path='/etc/docker-config', + mount_path='/home/user/.docker', ) kubernetes.use_secret_as_volume( push, @@ -254,7 +254,7 @@ Mount in your pipeline (as shown above) using: kubernetes.use_secret_as_volume( push, secret_name='docker-config', - mount_path='/etc/docker-config', + mount_path='/home/user/.docker', ) ``` diff --git a/build/dockerfiles/kubeflow-components/components/push-modelkit/component.yaml b/build/dockerfiles/kubeflow-components/components/push-modelkit/component.yaml index 400f7f70..69406828 100644 --- a/build/dockerfiles/kubeflow-components/components/push-modelkit/component.yaml +++ b/build/dockerfiles/kubeflow-components/components/push-modelkit/component.yaml @@ -17,12 +17,12 @@ inputs: - {name: code_commit, type: String, optional: true, description: 'Code commit hash'} outputs: - - {name: uri, type: String, description: 'Full ModelKit URI with digest'} - - {name: digest, type: String, description: 'ModelKit digest (sha256:...)'} + - {name: uri, type: String, description: 'Tagged ModelKit URI (e.g., registry.io/repo:tag)'} + - {name: digest, type: String, description: 'Digest-based ModelKit URI (e.g., registry.io/repo@sha256:...)'} implementation: container: - image: kubeflow:dev + image: ghcr.io/kitops-ml/kitops-kubeflow:latest command: - /bin/bash - -c diff --git a/build/dockerfiles/kubeflow-components/components/unpack-modelkit/component.yaml b/build/dockerfiles/kubeflow-components/components/unpack-modelkit/component.yaml index d0db3593..f66ced35 100644 --- a/build/dockerfiles/kubeflow-components/components/unpack-modelkit/component.yaml +++ b/build/dockerfiles/kubeflow-components/components/unpack-modelkit/component.yaml @@ -10,7 +10,7 @@ outputs: implementation: container: - image: ghcr.io/kitops-ml/kubeflow:latest + image: ghcr.io/kitops-ml/kitops-kubeflow:latest command: - /bin/bash - /scripts/unpack-modelkit.sh diff --git a/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py b/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py index d6913d34..19513f49 100644 --- a/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py +++ b/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py @@ -9,8 +9,6 @@ """ from kfp import dsl, kubernetes -from typing import NamedTuple - @dsl.component( packages_to_install=['pandas', 'xgboost', 'scikit-learn'], @@ -134,7 +132,7 @@ def push_modelkit( """ # Build command using safe argument passing return dsl.ContainerSpec( - image='kubeflow:dev', + image='ghcr.io/kitops-ml/kitops-kubeflow:latest', command=['/bin/bash', '-c'], args=[ ''' @@ -226,7 +224,6 @@ def house_prices_pipeline( print(f"Using KFP version: {kfp_version}") if kfp_version.startswith('2.'): - # KFP v2 - compile with v1 compatibility from kfp import compiler compiler.Compiler().compile( pipeline_func=house_prices_pipeline, diff --git a/build/dockerfiles/kubeflow-components/tests/push-modelkit.bats b/build/dockerfiles/kubeflow-components/tests/push-modelkit.bats index 93dd9796..a32e4287 100755 --- a/build/dockerfiles/kubeflow-components/tests/push-modelkit.bats +++ b/build/dockerfiles/kubeflow-components/tests/push-modelkit.bats @@ -250,8 +250,8 @@ EOF uri_content=$(cat "$OUTPUT_DIR/uri") digest_content=$(cat "$OUTPUT_DIR/digest") - [[ "$uri_content" =~ registry.io/myorg/mymodel@sha256: ]] - [[ "$digest_content" =~ sha256:abc123def456 ]] + [[ "$uri_content" == "registry.io/myorg/mymodel:v1" ]] + [[ "$digest_content" =~ registry.io/myorg/mymodel@sha256:abc123def456 ]] } @test "returns valid JSON output" { diff --git a/build/dockerfiles/kubeflow-components/tests/unpack-modelkit.bats b/build/dockerfiles/kubeflow-components/tests/unpack-modelkit.bats index e5870b11..e38bcc32 100755 --- a/build/dockerfiles/kubeflow-components/tests/unpack-modelkit.bats +++ b/build/dockerfiles/kubeflow-components/tests/unpack-modelkit.bats @@ -110,8 +110,8 @@ teardown() { @test "successfully unpacks modelkit to specified path" { run bash "$SCRIPT_PATH" "registry.io/myorg/mymodel:v1" "$EXTRACT_DIR" [ "$status" -eq 0 ] - [[ "$output" =~ "Starting ModelKit unpack workflow" ]] - [[ "$output" =~ "Unpacking ModelKit" ]] + [[ "$output" =~ "Starting unpack" ]] + [[ "$output" =~ "Unpacking" ]] [ -f "$EXTRACT_DIR/model.bin" ] } From d2300ffe65e206c18d348e8354e08a6cd693073d Mon Sep 17 00:00:00 2001 From: Gorkem Ercan Date: Tue, 2 Dec 2025 09:19:13 -0500 Subject: [PATCH 4/5] Add KServe inference example for Kubeflow ModelKit Includes InferenceService manifest demonstrating kit:// URI integration with KServe for deploying ModelKits. Signed-off-by: Gorkem Ercan --- .../examples/house-prices-inference-service.yaml | 14 ++++++++++++++ .../examples/house-prices-pipeline.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 build/dockerfiles/kubeflow-components/examples/house-prices-inference-service.yaml diff --git a/build/dockerfiles/kubeflow-components/examples/house-prices-inference-service.yaml b/build/dockerfiles/kubeflow-components/examples/house-prices-inference-service.yaml new file mode 100644 index 00000000..d0f119a8 --- /dev/null +++ b/build/dockerfiles/kubeflow-components/examples/house-prices-inference-service.yaml @@ -0,0 +1,14 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: house-prices-model + namespace: modelkit-demo + annotations: + serving.kserve.io/deploymentMode: RawDeployment +spec: + predictor: + model: + modelFormat: + name: sklearn + storageUri: kit://jozu.ml/demo/house-prices:latest + protocolVersion: v2 \ No newline at end of file diff --git a/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py b/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py index 19513f49..47b55fcc 100644 --- a/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py +++ b/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py @@ -5,7 +5,7 @@ creates a directory with well-named files (model.pkl, predictions.csv, train.py, README.md) and the push-modelkit component packages the entire directory as a ModelKit. -Uses pure KFP v2.14.3 components without v1 compatibility. +Uses KFP v2.14.3 """ from kfp import dsl, kubernetes From f1da0a899774b1499bd8b57f8b2f88d8bb7e3241 Mon Sep 17 00:00:00 2001 From: Gorkem Ercan Date: Mon, 8 Dec 2025 19:31:48 -0500 Subject: [PATCH 5/5] Refactor Kubeflow components to use reference instead of uri Update component inputs, outputs, and scripts to use reference instead of uri for OCI alignment. Also remove redundant CI triggers and dependencies. Signed-off-by: Gorkem Ercan --- .../workflows/kubeflow-components-test.yaml | 10 --- .../dockerfiles/kubeflow-components/README.md | 10 +-- .../components/push-modelkit/component.yaml | 6 +- .../components/unpack-modelkit/component.yaml | 4 +- .../examples/house-prices-pipeline.py | 10 +-- .../kubeflow-components/scripts/lib/common.sh | 4 +- .../scripts/push-modelkit.sh | 65 +++++++++---------- .../scripts/unpack-modelkit.sh | 14 ++-- .../tests/push-modelkit.bats | 15 ++--- .../tests/unpack-modelkit.bats | 6 +- 10 files changed, 60 insertions(+), 84 deletions(-) diff --git a/.github/workflows/kubeflow-components-test.yaml b/.github/workflows/kubeflow-components-test.yaml index ab1c2fb2..240ccf62 100644 --- a/.github/workflows/kubeflow-components-test.yaml +++ b/.github/workflows/kubeflow-components-test.yaml @@ -5,12 +5,6 @@ on: paths: - 'build/dockerfiles/kubeflow-components/**' - '.github/workflows/kubeflow-components-test.yaml' - push: - branches: - - main - paths: - - 'build/dockerfiles/kubeflow-components/**' - - '.github/workflows/kubeflow-components-test.yaml' jobs: test-scripts: @@ -25,10 +19,6 @@ jobs: sudo apt-get update sudo apt-get install -y bats - - name: Install jq - run: | - sudo apt-get install -y jq - - name: Run push-modelkit tests working-directory: build/dockerfiles/kubeflow-components run: bats tests/push-modelkit.bats diff --git a/build/dockerfiles/kubeflow-components/README.md b/build/dockerfiles/kubeflow-components/README.md index bd926cbf..82bf9119 100644 --- a/build/dockerfiles/kubeflow-components/README.md +++ b/build/dockerfiles/kubeflow-components/README.md @@ -31,8 +31,8 @@ If a `Kitfile` exists in `modelkit_dir`, it is used as-is. Otherwise, one is aut **Outputs** -- `uri` – Tagged ModelKit URI (e.g., `registry.io/myorg/mymodel:v1`) -- `digest` – Digest-based ModelKit URI (e.g., `registry.io/myorg/mymodel@sha256:abc…`) +- `ref` – Tagged ModelKit reference (e.g., `registry.io/myorg/mymodel:v1`) +- `digest` – Digest-based ModelKit reference (e.g., `registry.io/myorg/mymodel@sha256:abc…`) ### unpack-modelkit @@ -40,7 +40,7 @@ Pulls a ModelKit from a registry and extracts it. **Inputs** -- `modelkit_uri` – ModelKit reference (e.g., `registry.io/repo:tag` or `registry.io/repo@sha256:…`) +- `modelkit_reference` – ModelKit reference (e.g., `registry.io/repo:tag` or `registry.io/repo@sha256:…`) - `extract_path` – Directory to extract contents (default: `/tmp/model`) **Outputs** @@ -89,7 +89,7 @@ def push_modelkit( repository: str, tag: str, input_modelkit_dir: dsl.Input[dsl.Artifact], - output_uri: dsl.Output[dsl.Artifact], + output_ref: dsl.Output[dsl.Artifact], output_digest: dsl.Output[dsl.Artifact], modelkit_name: str = '', modelkit_desc: str = '', @@ -111,7 +111,7 @@ def push_modelkit( f'--dataset-uri "{dataset_uri}" ' f'--code-repo "{code_repo}" ' f'--code-commit "{code_commit}" ' - f'&& cp /tmp/outputs/uri "{output_uri.path}" ' + f'&& cp /tmp/outputs/reference "{output_ref.path}" ' f'&& cp /tmp/outputs/digest "{output_digest.path}"' ], ) diff --git a/build/dockerfiles/kubeflow-components/components/push-modelkit/component.yaml b/build/dockerfiles/kubeflow-components/components/push-modelkit/component.yaml index 69406828..238edf29 100644 --- a/build/dockerfiles/kubeflow-components/components/push-modelkit/component.yaml +++ b/build/dockerfiles/kubeflow-components/components/push-modelkit/component.yaml @@ -17,8 +17,8 @@ inputs: - {name: code_commit, type: String, optional: true, description: 'Code commit hash'} outputs: - - {name: uri, type: String, description: 'Tagged ModelKit URI (e.g., registry.io/repo:tag)'} - - {name: digest, type: String, description: 'Digest-based ModelKit URI (e.g., registry.io/repo@sha256:...)'} + - {name: ref, type: String, description: 'Tagged ModelKit reference (e.g., registry.io/repo:tag)'} + - {name: digest, type: String, description: 'Digest-based ModelKit reference(e.g., registry.io/repo@sha256:...)'} implementation: container: @@ -47,5 +47,5 @@ implementation: - {inputValue: code_repo} - {inputValue: code_commit} fileOutputs: - uri: /tmp/outputs/uri + ref: /tmp/outputs/reference digest: /tmp/outputs/digest diff --git a/build/dockerfiles/kubeflow-components/components/unpack-modelkit/component.yaml b/build/dockerfiles/kubeflow-components/components/unpack-modelkit/component.yaml index f66ced35..f4faf93f 100644 --- a/build/dockerfiles/kubeflow-components/components/unpack-modelkit/component.yaml +++ b/build/dockerfiles/kubeflow-components/components/unpack-modelkit/component.yaml @@ -2,7 +2,7 @@ name: unpack-modelkit description: Unpack ModelKit artifacts from a registry inputs: - - {name: modelkit_uri, type: String, description: 'ModelKit reference (e.g., registry.io/repo:tag or registry.io/repo@sha256:...)'} + - {name: modelkit_reference, type: String, description: 'ModelKit reference (e.g., registry.io/repo:tag or registry.io/repo@sha256:...)'} - {name: extract_path, type: String, default: '/tmp/model', description: 'Directory to extract ModelKit artifacts'} outputs: @@ -14,7 +14,7 @@ implementation: command: - /bin/bash - /scripts/unpack-modelkit.sh - - {inputValue: modelkit_uri} + - {inputValue: modelkit_reference} - {inputValue: extract_path} fileOutputs: model_path: /tmp/outputs/model_path diff --git a/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py b/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py index 47b55fcc..c0d12878 100644 --- a/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py +++ b/build/dockerfiles/kubeflow-components/examples/house-prices-pipeline.py @@ -114,7 +114,7 @@ def push_modelkit( registry: str, repository: str, tag: str, - output_uri: dsl.Output[dsl.Artifact], + output_ref: dsl.Output[dsl.Artifact], output_digest: dsl.Output[dsl.Artifact], input_modelkit_dir: dsl.Input[dsl.Artifact], modelkit_name: str = '', @@ -127,8 +127,8 @@ def push_modelkit( """Package and push model as ModelKit with attestation. Outputs: - output_uri: Tagged URI (e.g., jozu.ml/repo:tag) - output_digest: Digest URI (e.g., jozu.ml/repo@sha256:...) + output_ref: Tagged reference (e.g., jozu.ml/repo:tag) + output_digest: Digest reference (e.g., jozu.ml/repo@sha256:...) """ # Build command using safe argument passing return dsl.ContainerSpec( @@ -146,7 +146,7 @@ def push_modelkit( ${7:+--dataset-uri "$7"} \ ${8:+--code-repo "$8"} \ ${9:+--code-commit "$9"} \ - && cp /tmp/outputs/uri "${10}" \ + && cp /tmp/outputs/reference "${10}" \ && cp /tmp/outputs/digest "${11}" ''', registry, @@ -159,7 +159,7 @@ def push_modelkit( dataset_uri, code_repo, code_commit, - output_uri.path, + output_ref.path, output_digest.path ] ) diff --git a/build/dockerfiles/kubeflow-components/scripts/lib/common.sh b/build/dockerfiles/kubeflow-components/scripts/lib/common.sh index 2ce60d37..bb5df0de 100644 --- a/build/dockerfiles/kubeflow-components/scripts/lib/common.sh +++ b/build/dockerfiles/kubeflow-components/scripts/lib/common.sh @@ -69,8 +69,8 @@ die() { # Retry logic retry() { - local max_attempts=${1:-3} - local delay=${2:-2} + local max_attempts=$1 + local delay=$2 shift 2 local attempt=1 diff --git a/build/dockerfiles/kubeflow-components/scripts/push-modelkit.sh b/build/dockerfiles/kubeflow-components/scripts/push-modelkit.sh index f86c383b..fcf78409 100755 --- a/build/dockerfiles/kubeflow-components/scripts/push-modelkit.sh +++ b/build/dockerfiles/kubeflow-components/scripts/push-modelkit.sh @@ -91,13 +91,12 @@ if [ ! -d "$MODELKIT_DIR" ]; then die "ModelKit directory not found: $MODELKIT_DIR" fi -# Construct ModelKit URI -MODELKIT_URI="${REGISTRY}/${REPOSITORY}:${TAG}" +# Construct ModelKit reference +MODELKIT_REF="${REGISTRY}/${REPOSITORY}:${TAG}" -log_info "Starting ModelKit push" "{\"uri\":\"$MODELKIT_URI\"}" +log_info "Starting ModelKit push" "{\"reference\":\"$MODELKIT_REF\"}" require_cmd kit cosign jq -require_env DOCKER_CONFIG # Disable kit update notifications kit version --show-update-notifications=false >/dev/null 2>&1 || true @@ -126,47 +125,41 @@ fi # Pack the ModelKit log_info "Packing ModelKit artifacts" -kit pack "$WORK_DIR" -t "$MODELKIT_URI" || die "Failed to pack ModelKit" +kit pack "$WORK_DIR" -t "$MODELKIT_REF" || die "Failed to pack ModelKit" # Push to registry with retry log_info "Pushing to registry" -retry 3 2 kit push "$MODELKIT_URI" || die "Failed to push ModelKit" +retry 3 2 kit push "$MODELKIT_REF" || die "Failed to push ModelKit" -# Extract digest from kit inspect -log_debug "Extracting digest" -MODELKIT_DIGEST=$(echo "$MODELKIT_URI" | grep -oE '@sha256:[a-f0-9]+' | sed 's/@sha256://' || echo "") +# Fetch digest from registry +log_debug "Fetching digest from registry" -if [ -z "$MODELKIT_DIGEST" ]; then - log_debug "No digest in URI, fetching from registry" - - set +e - INSPECT_OUTPUT=$(kit inspect "$MODELKIT_URI" --remote 2>&1) - INSPECT_EXIT_CODE=$? - set -e +set +e +INSPECT_OUTPUT=$(kit inspect "$MODELKIT_REF" --remote 2>&1) +INSPECT_EXIT_CODE=$? +set -e - log_debug "Kit inspect completed" "{\"exit_code\":$INSPECT_EXIT_CODE}" +log_debug "Kit inspect completed" "{\"exit_code\":$INSPECT_EXIT_CODE}" - if [ $INSPECT_EXIT_CODE -eq 0 ]; then - # Extract digest from JSON output, filtering out any log lines - MODELKIT_DIGEST=$(echo "$INSPECT_OUTPUT" | grep -v '^{"timestamp"' | jq -r '.digest' 2>/dev/null | sed 's/sha256://' || echo "") - fi +if [ $INSPECT_EXIT_CODE -eq 0 ]; then + MODELKIT_DIGEST=$(echo "$INSPECT_OUTPUT" | jq -r '.digest' 2>/dev/null || echo "") +fi - if [ -z "$MODELKIT_DIGEST" ]; then - die "Could not determine ModelKit digest" "{\"reference\":\"$MODELKIT_URI\",\"exit_code\":$INSPECT_EXIT_CODE}" - fi +if [ -z "$MODELKIT_DIGEST" ]; then + die "Could not determine ModelKit digest" "{\"reference\":\"$MODELKIT_REF\",\"exit_code\":$INSPECT_EXIT_CODE}" fi log_debug "ModelKit digest: $MODELKIT_DIGEST" -# Construct full URI with digest -FULL_URI="${REGISTRY}/${REPOSITORY}@sha256:${MODELKIT_DIGEST}" +# Construct full reference with digest +FULL_REF="${REGISTRY}/${REPOSITORY}@${MODELKIT_DIGEST}" -log_info "Push completed" "{\"uri\":\"$FULL_URI\"}" +log_info "Push completed" "{\"reference\":\"$FULL_REF\"}" # Create in-toto attestation predicate ATTESTATION_PREDICATE=$(jq -nc \ - --arg uri "$FULL_URI" \ - --arg digest "sha256:$MODELKIT_DIGEST" \ + --arg reference "$FULL_REF" \ + --arg digest "$MODELKIT_DIGEST" \ --arg dataset_uri "$DATASET_URI" \ --arg code_repo "$CODE_REPO" \ --arg code_commit "$CODE_COMMIT" \ @@ -175,7 +168,7 @@ ATTESTATION_PREDICATE=$(jq -nc \ predicateType: "https://kitops.ml/attestation/v1", predicate: { modelkit: { - uri: $uri, + reference: $reference, digest: $digest }, metadata: { @@ -201,7 +194,7 @@ if [ -f "/etc/cosign/cosign.key" ]; then --predicate "$PREDICATE_FILE" \ --tlog-upload=false \ --yes \ - "$FULL_URI" 2>&1; then + "$FULL_REF" 2>&1; then log_info "Signed with cosign" else log_warn "Failed to sign with cosign, continuing" @@ -214,16 +207,16 @@ fi # Output results # Write to KFP output files -echo -n "$MODELKIT_URI" > /tmp/outputs/uri # Tagged URI (e.g., jozu.ml/repo:tag) -echo -n "$FULL_URI" > /tmp/outputs/digest # Digest URI (e.g., jozu.ml/repo@sha256:...) +echo -n "$MODELKIT_REF" > /tmp/outputs/reference # Tagged reference (e.g., jozu.ml/repo:tag) +echo -n "$FULL_REF" > /tmp/outputs/digest # Digest reference (e.g., jozu.ml/repo@sha256:...) # Output JSON to stdout jq -n \ - --arg uri "$FULL_URI" \ - --arg digest "sha256:$MODELKIT_DIGEST" \ + --arg reference "$FULL_REF" \ + --arg digest "$MODELKIT_DIGEST" \ --arg timestamp "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \ '{ - "uri": $uri, + "reference": $reference, "digest": $digest, "timestamp": $timestamp, "status": "success" diff --git a/build/dockerfiles/kubeflow-components/scripts/unpack-modelkit.sh b/build/dockerfiles/kubeflow-components/scripts/unpack-modelkit.sh index b3ee2146..c8f4269a 100755 --- a/build/dockerfiles/kubeflow-components/scripts/unpack-modelkit.sh +++ b/build/dockerfiles/kubeflow-components/scripts/unpack-modelkit.sh @@ -1,7 +1,7 @@ #!/bin/bash set -euo pipefail -# Usage: /scripts/unpack-modelkit.sh [extract_path] +# Usage: /scripts/unpack-modelkit.sh [extract_path] # Environment variables: `DOCKER_CONFIG` (path to .docker directory containing config.json) # Unpacks ModelKit artifacts to a directory @@ -10,13 +10,13 @@ source "${SCRIPT_DIR}/lib/common.sh" # Validate arguments if [ $# -lt 1 ]; then - die "Usage: $0 [extract_path]" + die "Usage: $0 [extract_path]" fi -MODELKIT_URI="$1" +MODELKIT_REF="$1" EXTRACT_PATH="${2:-/tmp/model}" -log_info "Starting unpack" "{\"modelkit_uri\":\"$MODELKIT_URI\",\"extract_path\":\"$EXTRACT_PATH\"}" +log_info "Starting unpack" "{\"modelkit_reference\":\"$MODELKIT_REF\",\"extract_path\":\"$EXTRACT_PATH\"}" require_cmd kit jq require_env DOCKER_CONFIG @@ -30,7 +30,7 @@ mkdir -p "$EXTRACT_PATH" # Step 1: Unpack ModelKit with retry log_info "Unpacking" -retry 3 2 kit unpack "$MODELKIT_URI" -d "$EXTRACT_PATH" || die "Failed to unpack ModelKit" +retry 3 2 kit unpack "$MODELKIT_REF" -d "$EXTRACT_PATH" || die "Failed to unpack ModelKit" log_info "Unpacked successfully" "{\"path\":\"$EXTRACT_PATH\"}" @@ -41,11 +41,11 @@ echo -n "$EXTRACT_PATH" > /tmp/outputs/model_path # Output JSON to stdout jq -n \ --arg path "$EXTRACT_PATH" \ - --arg uri "$MODELKIT_URI" \ + --arg reference "$MODELKIT_REF" \ --arg timestamp "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \ '{ "model_path": $path, - "modelkit_uri": $uri, + "modelkit_reference": $reference, "timestamp": $timestamp, "status": "success" }' diff --git a/build/dockerfiles/kubeflow-components/tests/push-modelkit.bats b/build/dockerfiles/kubeflow-components/tests/push-modelkit.bats index a32e4287..5f1fd5c3 100755 --- a/build/dockerfiles/kubeflow-components/tests/push-modelkit.bats +++ b/build/dockerfiles/kubeflow-components/tests/push-modelkit.bats @@ -239,7 +239,7 @@ EOF @test "creates output files in /tmp/outputs" { run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" [ "$status" -eq 0 ] - [ -f "$OUTPUT_DIR/uri" ] + [ -f "$OUTPUT_DIR/reference" ] [ -f "$OUTPUT_DIR/digest" ] } @@ -247,10 +247,10 @@ EOF run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" [ "$status" -eq 0 ] - uri_content=$(cat "$OUTPUT_DIR/uri") + ref_content=$(cat "$OUTPUT_DIR/reference") digest_content=$(cat "$OUTPUT_DIR/digest") - [[ "$uri_content" == "registry.io/myorg/mymodel:v1" ]] + [[ "$ref_content" == "registry.io/myorg/mymodel:v1" ]] [[ "$digest_content" =~ registry.io/myorg/mymodel@sha256:abc123def456 ]] } @@ -260,7 +260,7 @@ EOF # Extract final JSON output json_output=$(echo "$output" | awk '/^{$/,/^}$/' | jq -s '.[] | select(.status != null)') - echo "$json_output" | jq -e '.uri' + echo "$json_output" | jq -e '.reference' echo "$json_output" | jq -e '.digest' echo "$json_output" | jq -e '.status == "success"' } @@ -303,13 +303,6 @@ EOF [[ "$output" =~ "Required command not found: kit" ]] } -@test "fails when DOCKER_CONFIG not set" { - unset DOCKER_CONFIG - run bash "$SCRIPT_PATH" "registry.io" "myorg/mymodel" "v1" --modelkit-dir "$MODEL_DIR" - [ "$status" -eq 1 ] - [[ "$output" =~ "Required environment variable not set: DOCKER_CONFIG" ]] -} - @test "retries on push failure and eventually fails" { # Replace kit with failing version mv "$TEST_DIR/bin/kit" "$TEST_DIR/bin/kit.bak" diff --git a/build/dockerfiles/kubeflow-components/tests/unpack-modelkit.bats b/build/dockerfiles/kubeflow-components/tests/unpack-modelkit.bats index e38bcc32..ec7e2d5f 100755 --- a/build/dockerfiles/kubeflow-components/tests/unpack-modelkit.bats +++ b/build/dockerfiles/kubeflow-components/tests/unpack-modelkit.bats @@ -96,10 +96,10 @@ teardown() { run bash "$SCRIPT_PATH" [ "$status" -eq 1 ] [[ "$output" =~ "Usage:" ]] - [[ "$output" =~ "modelkit_uri" ]] + [[ "$output" =~ "modelkit_reference" ]] } -@test "succeeds with only modelkit_uri (uses default extract path)" { +@test "succeeds with only modelkit_reference (uses default extract path)" { run bash "$SCRIPT_PATH" "registry.io/myorg/mymodel:v1" [ "$status" -eq 0 ] [[ "$output" =~ "Unpack workflow completed" ]] @@ -136,7 +136,7 @@ teardown() { # Extract final JSON output (the one with "status" field) json_output=$(echo "$output" | awk '/^{$/,/^}$/' | jq -s '.[] | select(.status != null)') echo "$json_output" | jq -e '.model_path' - echo "$json_output" | jq -e '.modelkit_uri' + echo "$json_output" | jq -e '.modelkit_reference' echo "$json_output" | jq -e '.status == "success"' }