Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions dlc_developer_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ deep_canary_mode = false
[build]
# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
build_frameworks = []
build_frameworks = ["pytorch"]


# By default we build both training and inference containers. Set true/false values to determine which to build.
build_training = true
build_training = false
build_inference = true

# Set do_build to "false" to skip builds and test the latest image built by this PR
Expand Down Expand Up @@ -154,7 +154,7 @@ dlc-pr-tensorflow-2-habana-training = ""
### INFERENCE PR JOBS ###

# Standard Framework Inference
dlc-pr-pytorch-inference = ""
dlc-pr-pytorch-inference = "pytorch/inference/buildspec-2-8-ec2.yml"
dlc-pr-tensorflow-2-inference = ""
dlc-pr-autogluon-inference = ""

Expand Down Expand Up @@ -187,4 +187,4 @@ dlc-pr-tensorflow-2-eia-inference = ""
dlc-pr-vllm = ""

# sglang
dlc-pr-sglang = ""
dlc-pr-sglang = ""
50 changes: 50 additions & 0 deletions pytorch/inference/buildspec-2-8-ec2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
prod_account_id: &PROD_ACCOUNT_ID 763104351884
region: &REGION <set-$REGION-in-environment>
framework: &FRAMEWORK pytorch
version: &VERSION 2.8.0
short_version: &SHORT_VERSION "2.8"
arch_type: x86
# autopatch_build: "True"

repository_info:
inference_repository: &INFERENCE_REPOSITORY
image_type: &INFERENCE_IMAGE_TYPE inference
root: !join [ *FRAMEWORK, "/", *INFERENCE_IMAGE_TYPE ]
repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE ]
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE ]
release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]

context:
inference_context: &INFERENCE_CONTEXT
rayserve-entrypoint:
source: docker/build_artifacts/rayserve-entrypoint.py
target: rayserve-entrypoint.py
bash_telemetry:
source: ../../miscellaneous_scripts/bash_telemetry.sh
target: bash_telemetry.sh
setup_oss_compliance:
source: ../../scripts/setup_oss_compliance.sh
target: setup_oss_compliance.sh
deep_learning_container:
source: ../../src/deep_learning_container.py
target: deep_learning_container.py

images:
BuildEC2GPUPTInferencePy3DockerImage:
<<: *INFERENCE_REPOSITORY
build: &PYTORCH_GPU_INFERENCE_PY3 true
image_size_baseline: 23000
device_type: &DEVICE_TYPE gpu
python_version: &DOCKER_PYTHON_VERSION py3
tag_python_version: &TAG_PYTHON_VERSION py312
cuda_version: &CUDA_VERSION cu129
os_version: &OS_VERSION ubuntu22.04
tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
# skip_build: "False"
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
target: ec2
context:
<<: *INFERENCE_CONTEXT
158 changes: 158 additions & 0 deletions pytorch/inference/docker/2.8/py3/cu129/Dockerfile.gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
ARG PYTHON=python3
ARG PYTHON_VERSION=3.12.10
ARG PYTHON_SHORT_VERSION=3.12
ARG PYTORCH_VERSION=2.8.0
ARG TORCHAUDIO_VERSION=2.8.0
ARG TORCHVISION_VERSION=0.23.0
ARG RAY_VERSION=2.49.0

#################################################################
# ____
# / ___| ___ _ __ ___ _ __ ___ ___ _ __
# | | / _ \| '_ ` _ \| '_ ` _ \ / _ \| '_ \
# | |___ (_) | | | | | | | | | | | (_) | | | |
# \____|\___/|_| |_| |_|_| |_| |_|\___/|_| |_|
# ___ ____ _
# |_ _|_ __ ___ __ _ __ _ ___ | _ \ ___ ___(_)_ __ ___
# | || '_ ` _ \ / _` |/ _` |/ _ \ | |_) / _ \/ __| | '_ \ / _ \
# | || | | | | | (_| | (_| | __/ | _ < __/ (__| | |_) | __/
# |___|_| |_| |_|\__,_|\__, |\___| |_| \_\___|\___|_| .__/ \___|
# |___/ |_|
#################################################################

FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu22.04 AS common

LABEL maintainer="Amazon AI"
LABEL dlc_major_version="1"

ARG PYTHON
ARG PYTHON_VERSION
ARG PYTORCH_VERSION
ARG TORCHAUDIO_VERSION
ARG TORCHVISION_VERSION
ARG RAY_VERSION

ENV CUDA_HOME="/usr/local/cuda"
ENV PATH="${CUDA_HOME}/bin:${PATH}"

# Python won't try to write .pyc or .pyo files on the import of source modules
# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV PYTHONIOENCODING=UTF-8
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8

ENV DLC_CONTAINER_TYPE=inference
WORKDIR /

RUN apt-get update \
&& apt-get -y upgrade --only-upgrade systemd \
&& apt-get install -y --allow-change-held-packages --no-install-recommends \
libgl1-mesa-glx \
build-essential \
ca-certificates \
zlib1g-dev \
openssl \
libssl-dev \
pkg-config \
check \
llvm \
xz-utils \
curl \
wget \
unzip \
libffi-dev \
libbz2-dev \
liblzma-dev \
libsqlite3-dev \
libreadline-dev \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

COPY install_python.sh install_python.sh
RUN bash install_python.sh ${PYTHON_VERSION} && rm install_python.sh

# Python Path - nvidia/cuda base already has CUDA paths configured
ENV PATH="/usr/local/bin:${PATH}"

# Install PyTorch first (needs specific index-url)
RUN pip install --no-cache-dir \
torch==${PYTORCH_VERSION} \
torchvision==${TORCHVISION_VERSION} \
torchaudio==${TORCHAUDIO_VERSION} \
--index-url https://download.pytorch.org/whl/cu129 \
&& pip install --no-cache-dir "triton==3.2.0" \
&& pip uninstall -y dataclasses || true

# Install Ray Serve + all dependencies in one optimized command
RUN pip install --no-cache-dir \
ray[serve]==${RAY_VERSION} \
cryptography \
pyOpenSSL \
mkl \
charset-normalizer \
packaging \
PyYAML \
numpy \
scipy \
click \
psutil \
pillow \
h5py \
fsspec \
"idna>=3.7" \
"tqdm>=4.66.3" \
"requests>=2.32.0" \
"setuptools>=70.0.0" \
"urllib3>=2.5.0" \
opencv-python==4.11.0.86 \
jinja2>=3.1.6 \
tornado>=6.5.1

RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.8/license.txt

# Removing the cache as it is needed for security verification
RUN rm -rf /root/.cache | true

########################################################
# _____ ____ ____ ___
# | ____/ ___|___ \ |_ _|_ __ ___ __ _ __ _ ___
# | _|| | __) | | || '_ ` _ \ / _` |/ _` |/ _ \
# | |__| |___ / __/ | || | | | | | (_| | (_| | __/
# |_____\____|_____| |___|_| |_| |_|\__,_|\__, |\___|
# |___/
# ____ _
# | _ \ ___ ___(_)_ __ ___
# | |_) / _ \/ __| | '_ \ / _ \
# | _ < __/ (__| | |_) | __/
# |_| \_\___|\___|_| .__/ \___|
# |_|
########################################################

FROM common AS ec2

ARG PYTHON

WORKDIR /

COPY setup_oss_compliance.sh setup_oss_compliance.sh
RUN bash setup_oss_compliance.sh ${PYTHON} && rm setup_oss_compliance.sh

# This arg required to stop docker build waiting for region configuration while installing tz data from ubuntu 20
ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get autoremove -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

COPY rayserve-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
RUN chmod +x /usr/local/bin/dockerd-entrypoint.py

# Port 8000: Ray Serve default serving port, Port 8265: Ray dashboard
# References : https://docs.ray.io/en/latest/serve/production-guide/config.html
# https://docs.ray.io/en/latest/ray-observability/getting-started.html
EXPOSE 8000 8265
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
39 changes: 39 additions & 0 deletions pytorch/inference/docker/build_artifacts/rayserve-entrypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright 2019-2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import

import shlex
import subprocess
import sys

# Auto-start Ray cluster and Ray Serve
subprocess.run(
[
"ray",
"start",
"--head",
"--disable-usage-stats",
"--dashboard-host",
"0.0.0.0",
"--dashboard-port",
"8265",
],
check=True,
)

subprocess.run(["serve", "start", "--http-host", "0.0.0.0", "--http-port", "8000"], check=True)

if len(sys.argv) > 1:
subprocess.check_call(shlex.split(" ".join(sys.argv[1:])))
else:
subprocess.call(["tail", "-f", "/dev/null"])