Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.0.0
version: 1.1.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "2.0"
# This is the version number of the application being deployed. Keep this aligned
# with operator image MAJOR.MINOR version.
appVersion: "2.1"

dependencies:
- name: aws-mountpoint-s3-csi-driver
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,7 @@ spec:
l2CacheBackend:
description: L2 cache backend type. Required when L2CacheSpec
is provided.
pattern: (?i)redis
pattern: (?i)redis|tieredstorage
type: string
l2CacheLocalUrl:
description: Provide the L2 cache URL to local storage
Expand All @@ -721,6 +721,12 @@ spec:
- round_robin
type: string
type: object
maxDeployTimeInSeconds:
default: 3600
description: Maximum allowed time in seconds for the deployment to
complete before timing out. Defaults to 1 hour (3600 seconds)
format: int32
type: integer
metrics:
description: Configuration for metrics collection and exposure
properties:
Expand Down Expand Up @@ -1617,12 +1623,6 @@ spec:
- round_robin
type: string
type: object
maxDeployTimeInSeconds:
default: 3600
description: Maximum allowed time in seconds for the deployment to
complete before timing out. Defaults to 1 hour (3600 seconds)
format: int32
type: integer
metrics:
description: Configuration for metrics collection and exposure
properties:
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,94 @@ spec:
# versions < 1.19 or on vendors versions which do NOT support this field by default (i.e. Openshift < 4.11 ).
# seccompProfile:
# type: RuntimeDefault
initContainers:
- command:
- bash
- -lc
- |
set -euo pipefail
KUBECTL="$(command -v kubectl || true)"
if [ -z "${KUBECTL}" ]; then
for p in /opt/bitnami/kubectl/bin/kubectl /usr/local/bin/kubectl /usr/bin/kubectl /bin/kubectl; do
if [ -x "$p" ]; then KUBECTL="$p"; break; fi
done
fi
if [ -z "${KUBECTL}" ]; then
echo "kubectl not found in PATH or common locations" > /dev/termination-log
exit 2
fi

CHECKS="${CHECKS:-drivers crds}"

log() { echo "$1" > /dev/termination-log; }

require_csidriver() {
local provisioner="$1"
local friendly="$2"

# Try with error capture so we can disambiguate RBAC vs missing
if "${KUBECTL}" get csidriver "$provisioner" >/dev/null 2>&1 || \
"${KUBECTL}" get csidrivers.storage.k8s.io "$provisioner" >/dev/null 2>&1; then
return 0
fi


# Final attempt to capture the real error
err_msg="$("${KUBECTL}" get csidriver "$provisioner" 2>&1 || true)"
[ -z "$err_msg" ] && err_msg="$("${KUBECTL}" get csidrivers.storage.k8s.io "$provisioner" 2>&1 || true)"

if echo "$err_msg" | grep -qiE 'forbidden|permission|unauthorized|cannot.*get'; then
log "$friendly check failed: RBAC insufficient to read CSIDriver $provisioner. "${KUBECTL}" said: ${err_msg}"
exit 2
fi

log "$friendly not installed (missing CSIDriver $provisioner). kubectl said: ${err_msg}"
exit 1
}

require_crd() {
local crd="$1"
# Same idea: attempt and parse error text
if "${KUBECTL}" get crd "$crd" >/dev/null 2>&1; then
return 0
fi
err="$("${KUBECTL}" get crd "$crd" 2>&1 || true)"
if echo "$err" | grep -qiE 'forbidden|permission|unauthorized|cannot.*get'; then
log "CRD check failed: RBAC insufficient to read $crd. "${KUBECTL}" said: ${err}"
exit 2
fi
log "Missing required CRD: $crd. "${KUBECTL}" said: ${err}"
exit 1
}

# Dispatch selected checks
for c in $CHECKS; do
case "$c" in
drivers)
require_csidriver "s3.csi.aws.com" "S3 CSI driver"
require_csidriver "fsx.csi.aws.com" "FSx CSI driver"
;;
crds)
require_crd "certificaterequests.cert-manager.io" "cert-manager CRD"
require_crd "certificates.cert-manager.io" "cert-manager CRD"
;;
*)
log "Unknown check: $c"
exit 1
;;
esac
done

log "Checks passed: $CHECKS"
exit 0
env:
- name: CHECKS
value: "drivers crds"
image: "public.ecr.aws/bitnami/kubectl:1.30"
imagePullPolicy: Always
name: check-csi-drivers
resources: { }
terminationMessagePath: /dev/termination-log
containers:
- command:
- /hyperpod-inference-manager
Expand Down Expand Up @@ -93,7 +181,7 @@ spec:
resources:
limits:
cpu: 500m
memory: 128Mi
memory: 256Mi
requests:
cpu: 10m
memory: 64Mi
Expand Down Expand Up @@ -125,4 +213,4 @@ spec:
volumes:
- name: webhook-certs
secret:
secretName: webhook-server-cert
secretName: webhook-server-cert
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ image:
ap-southeast-4: 311141544681.dkr.ecr.ap-southeast-4.amazonaws.com
ap-southeast-3: 158128612970.dkr.ecr.ap-southeast-3.amazonaws.com
eu-south-2: 025050981094.dkr.ecr.eu-south-2.amazonaws.com
tag: v2.0
tag: v2.1
pullPolicy: Always
repository:
hyperpodClusterArn:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,17 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from hyperpod_jumpstart_inference_template.v1_0 import model as v1
from hyperpod_jumpstart_inference_template.v1_0.template import TEMPLATE_CONTENT as v1_template
from hyperpod_jumpstart_inference_template.v1_0 import model as v1_0
from hyperpod_jumpstart_inference_template.v1_1 import model as v1_1
from hyperpod_jumpstart_inference_template.v1_0.template import TEMPLATE_CONTENT as v1_0_template
from hyperpod_jumpstart_inference_template.v1_1.template import TEMPLATE_CONTENT as v1_1_template

SCHEMA_REGISTRY = {
"1.0": v1.FlatHPJumpStartEndpoint,
"1.0": v1_0.FlatHPJumpStartEndpoint,
"1.1": v1_1.FlatHPJumpStartEndpoint,
}

TEMPLATE_REGISTRY = {
"1.0": v1_template
"1.0": v1_0_template,
"1.1": v1_1_template,
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from pydantic import BaseModel, Field, model_validator, ConfigDict
from typing import Optional

# reuse the nested types
from sagemaker.hyperpod.inference.config.hp_jumpstart_endpoint_config import (
Model,
SageMakerEndpoint,
Server,
TlsConfig,
Validations,
)
from sagemaker.hyperpod.inference.hp_jumpstart_endpoint import HPJumpStartEndpoint
from sagemaker.hyperpod.common.config.metadata import Metadata


class FlatHPJumpStartEndpoint(BaseModel):
model_config = ConfigDict(extra="forbid")

namespace: Optional[str] = Field(
default=None, description="Kubernetes namespace", min_length=1
)

accept_eula: bool = Field(
False,
alias="accept_eula",
description="Whether model terms of use have been accepted",
)

metadata_name: Optional[str] = Field(
None,
alias="metadata_name",
description="Name of the jumpstart endpoint object",
max_length=63,
pattern=r"^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}$",
)

model_id: str = Field(
...,
alias="model_id",
description="Unique identifier of the model within the hub",
min_length=1,
max_length=63,
pattern=r"^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}$",
)

model_version: Optional[str] = Field(
None,
alias="model_version",
description="Semantic version of the model to deploy (e.g. 1.0.0)",
min_length=5,
max_length=14,
pattern=r"^\d{1,4}\.\d{1,4}\.\d{1,4}$",
)

instance_type: str = Field(
...,
alias="instance_type",
description="EC2 instance type for the inference server",
pattern=r"^ml\..*",
)

accelerator_partition_type: Optional[str] = Field(
None,
alias="accelerator_partition_type",
description="MIG profile to use for GPU partitioning",
pattern=r"^mig-.*$",
)

accelerator_partition_validation: Optional[bool] = Field(
True,
alias="accelerator_partition_validation",
description="Enable MIG validation for GPU partitioning. Default is true."
)

endpoint_name: Optional[str] = Field(
None,
alias="endpoint_name",
description="Name of SageMaker endpoint; empty string means no creation",
max_length=63,
pattern=r"^[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}$",
)
tls_certificate_output_s3_uri: Optional[str] = Field(
None,
alias="tls_certificate_output_s3_uri",
description="S3 URI to write the TLS certificate",
pattern=r"^s3://([^/]+)/?(.*)$",
)

@model_validator(mode="after")
def validate_name(self):
if not self.metadata_name and not self.endpoint_name:
raise ValueError("Either metadata_name or endpoint_name must be provided")
return self

def to_domain(self) -> HPJumpStartEndpoint:
if self.endpoint_name and not self.metadata_name:
self.metadata_name = self.endpoint_name

metadata = Metadata(name=self.metadata_name, namespace=self.namespace)

model = Model(
accept_eula=self.accept_eula,
model_id=self.model_id,
model_version=self.model_version,
)
validations = Validations(
accelerator_partition_validation=self.accelerator_partition_validation,
)
server = Server(
instance_type=self.instance_type,
accelerator_partition_type=self.accelerator_partition_type,
validations=validations,
)
sage_ep = SageMakerEndpoint(name=self.endpoint_name)
tls = TlsConfig(
tls_certificate_output_s3_uri=self.tls_certificate_output_s3_uri
)
return HPJumpStartEndpoint(
metadata=metadata,
model=model,
server=server,
sage_maker_endpoint=sage_ep,
tls_config=tls,
)
Loading
Loading