Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[huggingface_pytorch] Inference - update for HuggingFace Transformers to 4.46.1 - Accelerate 1.1.0 - PyTorch 2.3 #4392

Merged
merged 23 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions huggingface/pytorch/inference/buildspec-2-1-0.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
region: &REGION <set-$REGION-in-environment>
base_framework: &BASE_FRAMEWORK pytorch
framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
version: &VERSION 2.1.0
short_version: &SHORT_VERSION "2.1"
contributor: huggingface
arch_type: x86

repository_info:
inference_repository: &INFERENCE_REPOSITORY
image_type: &INFERENCE_IMAGE_TYPE inference
root: !join [ "huggingface/", *BASE_FRAMEWORK, "/", *INFERENCE_IMAGE_TYPE ]
repository_name: &REPOSITORY_NAME !join ["pr", "-", "huggingface", "-", *BASE_FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE]
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]

context:
inference_context: &INFERENCE_CONTEXT
mms-entrypoint:
source: ../../build_artifacts/inference/mms-entrypoint.py
target: mms-entrypoint.py
config:
source: ../../build_artifacts/inference/config.properties
target: config.properties
deep_learning_container:
source: ../../../src/deep_learning_container.py
target: deep_learning_container.py

images:
BuildHuggingFacePytorchCpuPy310InferenceDockerImage:
<<: *INFERENCE_REPOSITORY
build: &HUGGINGFACE_PYTORCH_CPU_INFERENCE_PY3 false
image_size_baseline: 15000
device_type: &DEVICE_TYPE cpu
python_version: &DOCKER_PYTHON_VERSION py3
tag_python_version: &TAG_PYTHON_VERSION py310
os_version: &OS_VERSION ubuntu22.04
transformers_version: &TRANSFORMERS_VERSION 4.37.0
tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-', *OS_VERSION ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
context:
<<: *INFERENCE_CONTEXT
BuildHuggingFacePytorchGpuPy310Cu118InferenceDockerImage:
<<: *INFERENCE_REPOSITORY
build: &HUGGINGFACE_PYTORCH_GPU_INFERENCE_PY3 false
image_size_baseline: &IMAGE_SIZE_BASELINE 15000
device_type: &DEVICE_TYPE gpu
python_version: &DOCKER_PYTHON_VERSION py3
tag_python_version: &TAG_PYTHON_VERSION py310
cuda_version: &CUDA_VERSION cu118
os_version: &OS_VERSION ubuntu20.04
transformers_version: &TRANSFORMERS_VERSION 4.37.0
tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-',
*CUDA_VERSION, '-', *OS_VERSION ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /,
*CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
context:
<<: *INFERENCE_CONTEXT
18 changes: 9 additions & 9 deletions huggingface/pytorch/inference/buildspec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
region: &REGION <set-$REGION-in-environment>
base_framework: &BASE_FRAMEWORK pytorch
framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
version: &VERSION 2.1.0
short_version: &SHORT_VERSION "2.1"
version: &VERSION 2.3.0
short_version: &SHORT_VERSION "2.3"
contributor: huggingface
arch_type: x86

Expand All @@ -27,29 +27,29 @@ context:
target: deep_learning_container.py

images:
BuildHuggingFacePytorchCpuPy310InferenceDockerImage:
BuildHuggingFacePytorchCpuPy311InferenceDockerImage:
<<: *INFERENCE_REPOSITORY
build: &HUGGINGFACE_PYTORCH_CPU_INFERENCE_PY3 false
image_size_baseline: 15000
device_type: &DEVICE_TYPE cpu
python_version: &DOCKER_PYTHON_VERSION py3
tag_python_version: &TAG_PYTHON_VERSION py310
tag_python_version: &TAG_PYTHON_VERSION py311
os_version: &OS_VERSION ubuntu22.04
transformers_version: &TRANSFORMERS_VERSION 4.37.0
transformers_version: &TRANSFORMERS_VERSION 4.46.1
tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-', *OS_VERSION ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
context:
<<: *INFERENCE_CONTEXT
BuildHuggingFacePytorchGpuPy310Cu118InferenceDockerImage:
BuildHuggingFacePytorchGpuPy311Cu121InferenceDockerImage:
<<: *INFERENCE_REPOSITORY
build: &HUGGINGFACE_PYTORCH_GPU_INFERENCE_PY3 false
image_size_baseline: &IMAGE_SIZE_BASELINE 15000
device_type: &DEVICE_TYPE gpu
python_version: &DOCKER_PYTHON_VERSION py3
tag_python_version: &TAG_PYTHON_VERSION py310
cuda_version: &CUDA_VERSION cu118
tag_python_version: &TAG_PYTHON_VERSION py311
cuda_version: &CUDA_VERSION cu121
os_version: &OS_VERSION ubuntu20.04
transformers_version: &TRANSFORMERS_VERSION 4.37.0
transformers_version: &TRANSFORMERS_VERSION 4.46.1
tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-',
*CUDA_VERSION, '-', *OS_VERSION ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /,
Expand Down
249 changes: 249 additions & 0 deletions huggingface/pytorch/inference/docker/2.3/py3/Dockerfile.cpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
FROM ubuntu:22.04 AS base_image

LABEL maintainer="Amazon AI"
LABEL dlc_major_version="1"
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true

ARG PYTHON=python3
ARG PYTHON_VERSION=3.11.9
ARG MINIFORGE3_VERSION=23.11.0-0
ARG OPEN_MPI_VERSION=4.1.5
ARG MMS_VERSION=1.1.11

# PyTorch Binaries and versions.
ARG TORCH_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.3.0/cpu/torch-2.3.0%2Bcpu-cp311-cp311-linux_x86_64.whl
ARG TORCHVISION_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.3.0/cpu/torchvision-0.18.0%2Bcpu-cp311-cp311-linux_x86_64.whl
ARG TORCHAUDIO_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/pytorch/v2.3.0/cpu/torchaudio-2.3.0%2Bcpu-cp311-cp311-linux_x86_64.whl

# HF ARGS
ARG TRANSFORMERS_VERSION
ARG HUGGINGFACE_HUB_VERSION=0.25.1
ARG DIFFUSERS_VERSION=0.31.0
ARG PEFT_VERSION=0.13.2
ARG ACCELERATE_VERSION=1.1.0

# This arg required to stop docker build waiting for region configuration while installing tz data from ubuntu 20
ENV DEBIAN_FRONTEND=noninteractive

# Python won’t try to write .pyc or .pyo files on the import of source modules
# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}"
ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}"
ENV PYTHONIOENCODING=UTF-8
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
ENV PATH=/opt/conda/bin:$PATH

# Set Env Variables for the images
ENV TEMP=/tmp
ENV MKL_THREADING_LAYER=GNU

ENV DLC_CONTAINER_TYPE=inference

RUN apt-get update \
&& apt-get -y upgrade \
&& apt-get install -y --no-install-recommends \
software-properties-common \
build-essential \
ca-certificates \
ccache \
numactl \
gcc-12 \
g++-12 \
make \
cmake \
curl \
emacs \
git \
jq \
libcurl4-openssl-dev \
libgl1-mesa-glx \
libglib2.0-0 \
libsm6 \
libssl-dev \
libxext6 \
libxrender-dev \
openjdk-17-jdk \
openssl \
unzip \
vim \
wget \
libjpeg-dev \
libpng-dev \
zlib1g-dev \
libsndfile1-dev \
ffmpeg \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/* \
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 \
&& update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 \
&& update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 \
&& update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 \
&& apt-get clean

# Install OpenMPI
RUN wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPEN_MPI_VERSION}.tar.gz \
&& gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf - \
&& cd openmpi-$OPEN_MPI_VERSION \
&& ./configure --prefix=/home/.openmpi \
&& make all install \
&& cd .. \
&& rm openmpi-$OPEN_MPI_VERSION.tar.gz \
&& rm -rf openmpi-$OPEN_MPI_VERSION

# The ENV variables declared below are changed in the previous section
# Grouping these ENV variables in the first section causes
# ompi_info to fail. This is only observed in CPU containers
ENV PATH="$PATH:/home/.openmpi/bin"
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/"
RUN ompi_info --parsable --all | grep mpi_built_with_cuda_support:value

# Install CondaForge miniconda
RUN curl -L -o ~/miniforge3.sh https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE3_VERSION}/Miniforge3-${MINIFORGE3_VERSION}-Linux-x86_64.sh \
&& chmod +x ~/miniforge3.sh \
&& ~/miniforge3.sh -b -p /opt/conda \
&& rm ~/miniforge3.sh \
&& /opt/conda/bin/conda install -c conda-forge \
python=${PYTHON_VERSION} \
cython \
"mkl<2024.1.0" \
mkl-include \
parso \
scipy \
typing \
h5py \
requests \
libgcc \
cmake \
packaging \
"awscli<2" \
boto3 \
pyyaml \
conda-content-trust \
charset-normalizer \
requests \
"idna>=3.7" \
"tqdm>=4.66.3" \
"zstandard>=0.22.0" \
&& /opt/conda/bin/conda clean -afy \
&& rm -rf /etc/apt/sources.list.d/*

# symlink pip for OS use
RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
&& ln -s /opt/conda/bin/pip /usr/local/bin/pip3

# Install Common python packages
RUN pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -U \
opencv-python \
"pyopenssl>=24.0.0" \
"cryptography>=42.0.5" \
"ipython>=8.10.0,<9.0" \
"awscli<2" \
"urllib3>=1.26.18,<2" \
"prompt-toolkit<3.0.39" \
"setuptools>=70.0.0"

# Ensure PyTorch did not get installed from Conda or pip, prior to now
# is CPU image, removing nvgpu
# Any Nvidia installs for the DLC will be below, removing nvidia and cuda packages from pip here
# Even the GPU image would not have nvidia or cuda packages in PIP.
RUN pip uninstall -y torch torchvision torchaudio multi-model-server

# Install AWS-PyTorch, and other torch packages
RUN pip install --no-cache-dir -U \
enum-compat==0.0.3 \
"Pillow>=9.0.0" \
${TORCH_URL} \
${TORCHVISION_URL} \
${TORCHAUDIO_URL}

WORKDIR /

RUN pip install --no-cache-dir \
multi-model-server==$MMS_VERSION \
sagemaker-inference

# Patches
# py-vuln: 71064
RUN pip install --no-cache-dir -U "requests>=2.32.3"

# add necessary certificate for aws sdk cpp download
RUN mkdir -p /etc/pki/tls/certs && cp /etc/ssl/certs/ca-certificates.crt /etc/pki/tls/certs/ca-bundle.crt

# create user and folders
RUN useradd -m model-server \
&& mkdir -p /home/model-server/tmp /opt/ml/model \
&& chown -R model-server /home/model-server /opt/ml/model

# add MMS entrypoint
COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
COPY config.properties /etc/sagemaker-mms.properties
RUN chmod +x /usr/local/bin/dockerd-entrypoint.py

# add telemetry
COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
RUN chmod +x /usr/local/bin/deep_learning_container.py

#################################
# Hugging Face specific section #
#################################

# install Hugging Face libraries and its dependencies
RUN pip install --no-cache-dir \
# hf_transfer will be a built-in feature, remove the extra then
huggingface_hub[hf_transfer]==${HUGGINGFACE_HUB_VERSION} \
transformers[sentencepiece,audio,vision]==${TRANSFORMERS_VERSION} \
diffusers==${DIFFUSERS_VERSION} \
peft==${PEFT_VERSION} \
accelerate==${ACCELERATE_VERSION} \
"protobuf>=3.19.5,<=3.20.2" \
"sagemaker-huggingface-inference-toolkit==2.4.1"

# hf_transfer will be a built-in feature, remove the env variavle then
ENV HF_HUB_ENABLE_HF_TRANSFER="1"

#####################
# IPEX installation #
#####################

# Skip ipex installation for now due to error: 0.18.0+cpu, the required version for compiling is 0.18.0+cpu...
# Install IPEx and its dependencies
# from source is mandatory for cutomized AWS PyTorch binaries: https://github.com/intel/intel-extension-for-pytorch/issues/317
# RUN pip install --no-cache-dir intel-openmp tbb pyyaml
# RUN cd /opt/ \
# && mkdir -p ipex \
# && cd /opt/ipex \
# && wget https://github.com/intel/intel-extension-for-pytorch/raw/v2.3.0%2Bcpu/scripts/compile_bundle.sh \
# && MODE=3 bash compile_bundle.sh \
# && rm -rf /opt/ipex && cd /opt/

# IPEx installation installs the numpy==1.25.1. That causes a pip check failure due to incompatibility with numba.
# Re-installing numpy after IPEx installation to get the appropriate numpy version and fix pip checks.
# RUN pip install --no-cache-dir \
# "numpy<1.25" \
# "pyyaml>=5.4"

RUN HOME_DIR=/root \
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
&& chmod +x /usr/local/bin/testOSSCompliance \
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
&& rm -rf ${HOME_DIR}/oss_compliance*

RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.3/license.txt

## Cleanup ##
RUN pip cache purge \
&& rm -rf /tmp/tmp* \
&& rm -iRf /root/.cache \
&& rm -rf /opt/llvm-project \
&& rm -rf opt/intel-extension-for-pytorch

EXPOSE 8080 8081
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
CMD ["serve"]
Loading
Loading