aws · Captainia · Nov 15, 2024 · Sep 18, 2024 · Sep 18, 2024 · Oct 28, 2024
@@ -0,0 +1,58 @@
+account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
+region: &REGION <set-$REGION-in-environment>
+base_framework: &BASE_FRAMEWORK pytorch
+framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
+version: &VERSION 2.1.0
+short_version: &SHORT_VERSION "2.1"
+contributor: huggingface
+arch_type: x86
+
+repository_info:
+  inference_repository: &INFERENCE_REPOSITORY
+    image_type: &INFERENCE_IMAGE_TYPE inference
+    root: !join [ "huggingface/", *BASE_FRAMEWORK, "/", *INFERENCE_IMAGE_TYPE ]
+    repository_name: &REPOSITORY_NAME !join ["pr", "-", "huggingface", "-", *BASE_FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE]
+    repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
+
+context:
+  inference_context: &INFERENCE_CONTEXT
+    mms-entrypoint:
+      source: ../../build_artifacts/inference/mms-entrypoint.py
+      target: mms-entrypoint.py
+    config:
+      source: ../../build_artifacts/inference/config.properties
+      target: config.properties
+    deep_learning_container:
+      source: ../../../src/deep_learning_container.py
+      target: deep_learning_container.py
+
+images:
+  BuildHuggingFacePytorchCpuPy310InferenceDockerImage:
+    <<: *INFERENCE_REPOSITORY
+    build: &HUGGINGFACE_PYTORCH_CPU_INFERENCE_PY3 false
+    image_size_baseline: 15000
+    device_type: &DEVICE_TYPE cpu
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py310
+    os_version: &OS_VERSION ubuntu22.04
+    transformers_version: &TRANSFORMERS_VERSION 4.37.0
+    tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-', *OS_VERSION ]
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
+    context:
+      <<: *INFERENCE_CONTEXT
+  BuildHuggingFacePytorchGpuPy310Cu118InferenceDockerImage:
+    <<: *INFERENCE_REPOSITORY
+    build: &HUGGINGFACE_PYTORCH_GPU_INFERENCE_PY3 false
+    image_size_baseline: &IMAGE_SIZE_BASELINE 15000
+    device_type: &DEVICE_TYPE gpu
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py310
+    cuda_version: &CUDA_VERSION cu118
+    os_version: &OS_VERSION ubuntu20.04
+    transformers_version: &TRANSFORMERS_VERSION 4.37.0
+    tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-',
+                 *CUDA_VERSION, '-', *OS_VERSION ]
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /,
+                         *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
+    context:
+      <<: *INFERENCE_CONTEXT
@@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
 region: &REGION <set-$REGION-in-environment>
 base_framework: &BASE_FRAMEWORK pytorch
 framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
-version: &VERSION 2.1.0
-short_version: &SHORT_VERSION "2.1"
+version: &VERSION 2.3.0
+short_version: &SHORT_VERSION "2.3"
 contributor: huggingface
 arch_type: x86
 
@@ -27,29 +27,29 @@ context:
       target: deep_learning_container.py
 
 images:
-  BuildHuggingFacePytorchCpuPy310InferenceDockerImage:
+  BuildHuggingFacePytorchCpuPy311InferenceDockerImage:
     <<: *INFERENCE_REPOSITORY
     build: &HUGGINGFACE_PYTORCH_CPU_INFERENCE_PY3 false
     image_size_baseline: 15000
     device_type: &DEVICE_TYPE cpu
     python_version: &DOCKER_PYTHON_VERSION py3
-    tag_python_version: &TAG_PYTHON_VERSION py310
+    tag_python_version: &TAG_PYTHON_VERSION py311
     os_version: &OS_VERSION ubuntu22.04
-    transformers_version: &TRANSFORMERS_VERSION 4.37.0
+    transformers_version: &TRANSFORMERS_VERSION 4.46.1
     tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-', *OS_VERSION ]
     docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
     context:
       <<: *INFERENCE_CONTEXT
-  BuildHuggingFacePytorchGpuPy310Cu118InferenceDockerImage:
+  BuildHuggingFacePytorchGpuPy311Cu121InferenceDockerImage:
     <<: *INFERENCE_REPOSITORY
     build: &HUGGINGFACE_PYTORCH_GPU_INFERENCE_PY3 false
     image_size_baseline: &IMAGE_SIZE_BASELINE 15000
     device_type: &DEVICE_TYPE gpu
     python_version: &DOCKER_PYTHON_VERSION py3
-    tag_python_version: &TAG_PYTHON_VERSION py310
-    cuda_version: &CUDA_VERSION cu118
+    tag_python_version: &TAG_PYTHON_VERSION py311
+    cuda_version: &CUDA_VERSION cu121
     os_version: &OS_VERSION ubuntu20.04
-    transformers_version: &TRANSFORMERS_VERSION 4.37.0
+    transformers_version: &TRANSFORMERS_VERSION 4.46.1
     tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-',
                  *CUDA_VERSION, '-', *OS_VERSION ]
     docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /,

@@ -0,0 +1,249 @@
+FROM ubuntu:22.04 AS base_image
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
+LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
+
+ARG PYTHON=python3
+ARG PYTHON_VERSION=3.11.9
+ARG MINIFORGE3_VERSION=23.11.0-0
+ARG OPEN_MPI_VERSION=4.1.5
+ARG MMS_VERSION=1.1.11
+
+# PyTorch Binaries and versions.
+ARG TORCH_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.3.0/cpu/torch-2.3.0%2Bcpu-cp311-cp311-linux_x86_64.whl
+ARG TORCHVISION_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.3.0/cpu/torchvision-0.18.0%2Bcpu-cp311-cp311-linux_x86_64.whl
+ARG TORCHAUDIO_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.3.0/cpu/torchaudio-2.3.0%2Bcpu-cp311-cp311-linux_x86_64.whl
+
+# HF ARGS
+ARG TRANSFORMERS_VERSION
+ARG HUGGINGFACE_HUB_VERSION=0.25.1
+ARG DIFFUSERS_VERSION=0.31.0
+ARG PEFT_VERSION=0.13.2
+ARG ACCELERATE_VERSION=1.1.0
+
+# This arg required to stop docker build waiting for region configuration while installing tz data from ubuntu 20
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Python won’t try to write .pyc or .pyo files on the import of source modules
+# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+ENV LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}"
+ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}"
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+ENV PATH=/opt/conda/bin:$PATH
+
+# Set Env Variables for the images
+ENV TEMP=/tmp
+ENV MKL_THREADING_LAYER=GNU
+
+ENV DLC_CONTAINER_TYPE=inference
+
+RUN apt-get update \
+ && apt-get -y upgrade \
+ && apt-get install -y --no-install-recommends \
+    software-properties-common \
+    build-essential \
+    ca-certificates \
+    ccache \
+    numactl \
+    gcc-12 \
+    g++-12 \
+    make \
+    cmake \
+    curl \
+    emacs \
+    git \
+    jq \
+    libcurl4-openssl-dev \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    libsm6 \
+    libssl-dev \
+    libxext6 \
+    libxrender-dev \
+    openjdk-17-jdk \
+    openssl \
+    unzip \
+    vim \
+    wget \
+    libjpeg-dev \
+    libpng-dev \
+    zlib1g-dev \
+    libsndfile1-dev \
+    ffmpeg \ 
+ && apt-get autoremove -y \
+ && rm -rf /var/lib/apt/lists/* \
+ && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 \
+ && update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 \
+ && update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 \
+ && update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 \
+ && apt-get clean
+
+# Install OpenMPI
+RUN wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPEN_MPI_VERSION}.tar.gz \
+ && gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf - \
+ && cd openmpi-$OPEN_MPI_VERSION \
+ && ./configure --prefix=/home/.openmpi \
+ && make all install \
+ && cd .. \
+ && rm openmpi-$OPEN_MPI_VERSION.tar.gz \
+ && rm -rf openmpi-$OPEN_MPI_VERSION
+
+# The ENV variables declared below are changed in the previous section
+# Grouping these ENV variables in the first section causes
+# ompi_info to fail. This is only observed in CPU containers
+ENV PATH="$PATH:/home/.openmpi/bin"
+ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/"
+RUN ompi_info --parsable --all | grep mpi_built_with_cuda_support:value
+
+# Install CondaForge miniconda
+RUN curl -L -o ~/miniforge3.sh https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE3_VERSION}/Miniforge3-${MINIFORGE3_VERSION}-Linux-x86_64.sh \
+ && chmod +x ~/miniforge3.sh \
+ && ~/miniforge3.sh -b -p /opt/conda \
+ && rm ~/miniforge3.sh \
+ && /opt/conda/bin/conda install -c conda-forge \
+    python=${PYTHON_VERSION} \
+    cython \
+    "mkl<2024.1.0" \
+    mkl-include \
+    parso \
+    scipy \
+    typing \
+    h5py \
+    requests \
+    libgcc \
+    cmake \
+    packaging \
+    "awscli<2" \
+    boto3 \
+    pyyaml \
+    conda-content-trust \
+    charset-normalizer \
+    requests \
+    "idna>=3.7" \
+    "tqdm>=4.66.3" \
+    "zstandard>=0.22.0" \
+ && /opt/conda/bin/conda clean -afy \
+ && rm -rf /etc/apt/sources.list.d/*
+
+# symlink pip for OS use
+RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
+ && ln -s /opt/conda/bin/pip /usr/local/bin/pip3
+
+# Install Common python packages
+RUN pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -U \
+    opencv-python \
+    "pyopenssl>=24.0.0" \
+    "cryptography>=42.0.5" \
+    "ipython>=8.10.0,<9.0" \
+    "awscli<2" \
+    "urllib3>=1.26.18,<2" \
+    "prompt-toolkit<3.0.39" \
+    "setuptools>=70.0.0"
+
+# Ensure PyTorch did not get installed from Conda or pip, prior to now
+# is CPU image, removing nvgpu
+# Any Nvidia installs for the DLC will be below, removing nvidia and cuda packages from pip here
+# Even the GPU image would not have nvidia or cuda packages in PIP.
+RUN pip uninstall -y torch torchvision torchaudio multi-model-server
+
+# Install AWS-PyTorch, and other torch packages
+RUN pip install --no-cache-dir -U \
+    enum-compat==0.0.3 \
+    "Pillow>=9.0.0" \
+    ${TORCH_URL} \
+    ${TORCHVISION_URL} \
+    ${TORCHAUDIO_URL}
+
+WORKDIR /
+
+RUN pip install --no-cache-dir \
+    multi-model-server==$MMS_VERSION \
+    sagemaker-inference
+
+# Patches
+# py-vuln: 71064
+RUN pip install --no-cache-dir -U "requests>=2.32.3"
+
+# add necessary certificate for aws sdk cpp download
+RUN mkdir -p /etc/pki/tls/certs && cp /etc/ssl/certs/ca-certificates.crt /etc/pki/tls/certs/ca-bundle.crt
+
+# create user and folders
+RUN useradd -m model-server \
+ && mkdir -p /home/model-server/tmp /opt/ml/model \
+ && chown -R model-server /home/model-server /opt/ml/model
+
+# add MMS entrypoint
+COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
+COPY config.properties /etc/sagemaker-mms.properties
+RUN chmod +x /usr/local/bin/dockerd-entrypoint.py
+
+# add telemetry
+COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
+RUN chmod +x /usr/local/bin/deep_learning_container.py
+
+#################################
+# Hugging Face specific section #
+#################################
+
+# install Hugging Face libraries and its dependencies
+RUN pip install --no-cache-dir \
+    # hf_transfer will be a built-in feature, remove the extra then
+    huggingface_hub[hf_transfer]==${HUGGINGFACE_HUB_VERSION} \
+    transformers[sentencepiece,audio,vision]==${TRANSFORMERS_VERSION} \
+    diffusers==${DIFFUSERS_VERSION} \
+    peft==${PEFT_VERSION} \
+    accelerate==${ACCELERATE_VERSION} \
+    "protobuf>=3.19.5,<=3.20.2" \
+    "sagemaker-huggingface-inference-toolkit==2.4.1"
+
+# hf_transfer will be a built-in feature, remove the env variavle then
+ENV HF_HUB_ENABLE_HF_TRANSFER="1"
+
+#####################
+# IPEX installation #
+#####################
+
+# Skip ipex installation for now due to error: 0.18.0+cpu, the required version for compiling is 0.18.0+cpu...
+# Install IPEx and its dependencies
+# from source is mandatory for cutomized AWS PyTorch binaries: https://github.com/intel/intel-extension-for-pytorch/issues/317
+# RUN pip install --no-cache-dir intel-openmp tbb pyyaml
+# RUN cd /opt/ \
+#  && mkdir -p ipex \
+#  && cd /opt/ipex \
+#  && wget https://github.com/intel/intel-extension-for-pytorch/raw/v2.3.0%2Bcpu/scripts/compile_bundle.sh \
+#  && MODE=3 bash compile_bundle.sh \
+#  && rm -rf /opt/ipex && cd /opt/
+
+# IPEx installation installs the numpy==1.25.1. That causes a pip check failure due to incompatibility with numba.
+# Re-installing numpy after IPEx installation to get the appropriate numpy version and fix pip checks.
+# RUN pip install --no-cache-dir \
+#     "numpy<1.25" \
+#     "pyyaml>=5.4"
+
+RUN HOME_DIR=/root \
+ && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
+ && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
+ && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
+ && chmod +x /usr/local/bin/testOSSCompliance \
+ && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
+ && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
+ && rm -rf ${HOME_DIR}/oss_compliance*
+
+RUN curl -o /license.txt  https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.3/license.txt
+
+## Cleanup ##
+RUN pip cache purge \
+ && rm -rf /tmp/tmp* \
+ && rm -iRf /root/.cache \
+ && rm -rf /opt/llvm-project \
+ && rm -rf opt/intel-extension-for-pytorch
+
+EXPOSE 8080 8081
+ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
+CMD ["serve"]