aws · jinyan-li1 · Dec 11, 2025 · Dec 11, 2025 · Dec 11, 2025 · Dec 11, 2025
@@ -37,11 +37,11 @@ deep_canary_mode = false
 [build]
 # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
 # available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
-build_frameworks = []
+build_frameworks = ["pytorch"]
 
 
 # By default we build both training and inference containers. Set true/false values to determine which to build.
-build_training = true
+build_training = false
 build_inference = true
 
 # Set do_build to "false" to skip builds and test the latest image built by this PR
@@ -154,7 +154,7 @@ dlc-pr-tensorflow-2-habana-training = ""
 ### INFERENCE PR JOBS ###
 
 # Standard Framework Inference
-dlc-pr-pytorch-inference = ""
+dlc-pr-pytorch-inference = "pytorch/inference/buildspec-2-8-ec2.yml"
 dlc-pr-tensorflow-2-inference = ""
 dlc-pr-autogluon-inference = ""
 
@@ -187,4 +187,4 @@ dlc-pr-tensorflow-2-eia-inference = ""
 dlc-pr-vllm = ""
 
 # sglang
-dlc-pr-sglang = ""
+dlc-pr-sglang = ""
@@ -0,0 +1,50 @@
+account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
+prod_account_id: &PROD_ACCOUNT_ID 763104351884
+region: &REGION <set-$REGION-in-environment>
+framework: &FRAMEWORK pytorch
+version: &VERSION 2.8.0
+short_version: &SHORT_VERSION "2.8"
+arch_type: x86
+# autopatch_build: "True"
+
+repository_info:
+  inference_repository: &INFERENCE_REPOSITORY
+    image_type: &INFERENCE_IMAGE_TYPE inference
+    root: !join [ *FRAMEWORK, "/", *INFERENCE_IMAGE_TYPE ]
+    repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE ]
+    repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
+    release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE ]
+    release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
+
+context:
+  inference_context: &INFERENCE_CONTEXT
+    rayserve-entrypoint:
+      source: docker/build_artifacts/rayserve-entrypoint.py
+      target: rayserve-entrypoint.py
+    bash_telemetry:
+      source: ../../miscellaneous_scripts/bash_telemetry.sh
+      target: bash_telemetry.sh
+    setup_oss_compliance:
+      source: ../../scripts/setup_oss_compliance.sh
+      target: setup_oss_compliance.sh
+    deep_learning_container:
+      source: ../../src/deep_learning_container.py
+      target: deep_learning_container.py
+
+images:
+  BuildEC2GPUPTInferencePy3DockerImage:
+    <<: *INFERENCE_REPOSITORY
+    build: &PYTORCH_GPU_INFERENCE_PY3 true
+    image_size_baseline: 23000
+    device_type: &DEVICE_TYPE gpu
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py312
+    cuda_version: &CUDA_VERSION cu129
+    os_version: &OS_VERSION ubuntu22.04
+    tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
+    latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
+    # skip_build: "False"
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
+    target: ec2
+    context:
+      <<: *INFERENCE_CONTEXT
@@ -0,0 +1,158 @@
+ARG PYTHON=python3
+ARG PYTHON_VERSION=3.12.10
+ARG PYTHON_SHORT_VERSION=3.12
+ARG PYTORCH_VERSION=2.8.0
+ARG TORCHAUDIO_VERSION=2.8.0
+ARG TORCHVISION_VERSION=0.23.0
+ARG RAY_VERSION=2.49.0
+
+#################################################################
+#   ____
+#  / ___| ___  _ __ ___  _ __ ___   ___  _ __
+# | |    / _ \| '_ ` _ \| '_ ` _ \ / _ \| '_ \
+# | |___  (_) | | | | | | | | | | | (_) | | | |
+#  \____|\___/|_| |_| |_|_| |_| |_|\___/|_| |_|
+#  ___                              ____           _
+# |_ _|_ __ ___   __ _  __ _  ___  |  _ \ ___  ___(_)_ __   ___
+#  | || '_ ` _ \ / _` |/ _` |/ _ \ | |_) / _ \/ __| | '_ \ / _ \
+#  | || | | | | | (_| | (_| |  __/ |  _ <  __/ (__| | |_) |  __/
+# |___|_| |_| |_|\__,_|\__, |\___| |_| \_\___|\___|_| .__/ \___|
+#                      |___/                        |_|
+#################################################################
+
+FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu22.04 AS common
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+ARG PYTHON
+ARG PYTHON_VERSION
+ARG PYTORCH_VERSION
+ARG TORCHAUDIO_VERSION
+ARG TORCHVISION_VERSION
+ARG RAY_VERSION
+
+ENV CUDA_HOME="/usr/local/cuda"
+ENV PATH="${CUDA_HOME}/bin:${PATH}"
+
+# Python won't try to write .pyc or .pyo files on the import of source modules
+# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+
+ENV DLC_CONTAINER_TYPE=inference
+WORKDIR /
+
+RUN apt-get update \
+ && apt-get -y upgrade --only-upgrade systemd \
+ && apt-get install -y --allow-change-held-packages --no-install-recommends \
+    libgl1-mesa-glx \
+    build-essential \
+    ca-certificates \
+    zlib1g-dev \
+    openssl \
+    libssl-dev \
+    pkg-config \
+    check \
+    llvm \
+    xz-utils \
+    curl \
+    wget \
+    unzip \
+    libffi-dev \
+    libbz2-dev \
+    liblzma-dev \
+    libsqlite3-dev \
+    libreadline-dev \
+ && rm -rf /var/lib/apt/lists/* \
+ && apt-get clean
+
+COPY install_python.sh install_python.sh
+RUN bash install_python.sh ${PYTHON_VERSION} && rm install_python.sh
+
+# Python Path - nvidia/cuda base already has CUDA paths configured
+ENV PATH="/usr/local/bin:${PATH}"
+
+# Install PyTorch first (needs specific index-url)
+RUN pip install --no-cache-dir \
+    torch==${PYTORCH_VERSION} \
+    torchvision==${TORCHVISION_VERSION} \
+    torchaudio==${TORCHAUDIO_VERSION} \
+    --index-url https://download.pytorch.org/whl/cu129 \
+ && pip install --no-cache-dir "triton==3.2.0" \
+ && pip uninstall -y dataclasses || true
+
+# Install Ray Serve + all dependencies in one optimized command
+RUN pip install --no-cache-dir \
+    ray[serve]==${RAY_VERSION} \
+    cryptography \
+    pyOpenSSL \
+    mkl \
+    charset-normalizer \
+    packaging \
+    PyYAML \
+    numpy \
+    scipy \
+    click \
+    psutil \
+    pillow \
+    h5py \
+    fsspec \
+    "idna>=3.7" \
+    "tqdm>=4.66.3" \
+    "requests>=2.32.0" \
+    "setuptools>=70.0.0" \
+    "urllib3>=2.5.0" \
+    opencv-python==4.11.0.86 \
+    jinja2>=3.1.6 \
+    tornado>=6.5.1
+
+RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.8/license.txt
+
+# Removing the cache as it is needed for security verification
+RUN rm -rf /root/.cache | true
+
+########################################################
+#  _____ ____ ____    ___
+# | ____/ ___|___ \  |_ _|_ __ ___   __ _  __ _  ___
+# |  _|| |     __) |  | || '_ ` _ \ / _` |/ _` |/ _ \
+# | |__| |___ / __/   | || | | | | | (_| | (_| |  __/
+# |_____\____|_____| |___|_| |_| |_|\__,_|\__, |\___|
+#                                         |___/
+#  ____           _
+# |  _ \ ___  ___(_)_ __   ___
+# | |_) / _ \/ __| | '_ \ / _ \
+# |  _ <  __/ (__| | |_) |  __/
+# |_| \_\___|\___|_| .__/ \___|
+#                  |_|
+########################################################
+
+FROM common AS ec2
+
+ARG PYTHON
+
+WORKDIR /
+
+COPY setup_oss_compliance.sh setup_oss_compliance.sh
+RUN bash setup_oss_compliance.sh ${PYTHON} && rm setup_oss_compliance.sh
+
+# This arg required to stop docker build waiting for region configuration while installing tz data from ubuntu 20
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update \
+ && apt-get upgrade -y \
+ && apt-get autoremove -y \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+COPY rayserve-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
+RUN chmod +x /usr/local/bin/dockerd-entrypoint.py
+
+# Port 8000: Ray Serve default serving port, Port 8265: Ray dashboard
+# References : https://docs.ray.io/en/latest/serve/production-guide/config.html
+#            https://docs.ray.io/en/latest/ray-observability/getting-started.html
+EXPOSE 8000 8265
+ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
@@ -0,0 +1,39 @@
+# Copyright 2019-2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import shlex
+import subprocess
+import sys
+
+# Auto-start Ray cluster and Ray Serve
+subprocess.run(
+    [
+        "ray",
+        "start",
+        "--head",
+        "--disable-usage-stats",
+        "--dashboard-host",
+        "0.0.0.0",
+        "--dashboard-port",
+        "8265",
+    ],
+    check=True,
+)
+
+subprocess.run(["serve", "start", "--http-host", "0.0.0.0", "--http-port", "8000"], check=True)
+
+if len(sys.argv) > 1:
+    subprocess.check_call(shlex.split(" ".join(sys.argv[1:])))
+else:
+    subprocess.call(["tail", "-f", "/dev/null"])