Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HuggingFace][Neuronx] Inference - Optimum Neuron 0.0.25 - Neuron sdk 2.20.0 - Transformers to 4.43.2 #4308

Merged
merged 14 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions dlc_developer_config.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[dev]
# Set to "huggingface", for example, if you are a huggingface developer. Default is ""
partner_developer = ""
partner_developer = "huggingface"
# Please only set it to true if you are preparing an EI related PR
# Do remember to revert it back to false before merging any PR (including EI dedicated PR)
ei_mode = false
Expand All @@ -9,7 +9,7 @@ ei_mode = false
neuron_mode = false
# Please only set it to true if you are preparing a NEURONX related PR
# Do remember to revert it back to false before merging any PR (including NEURONX dedicated PR)
neuronx_mode = false
neuronx_mode = true
# Please only set it to true if you are preparing a GRAVITON related PR
# Do remember to revert it back to false before merging any PR (including GRAVITON dedicated PR)
graviton_mode = false
Expand All @@ -34,10 +34,10 @@ deep_canary_mode = false
[build]
# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
# available frameworks - ["autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "mxnet", "pytorch", "stabilityai_pytorch"]
build_frameworks = []
build_frameworks = ["huggingface_pytorch"]

# By default we build both training and inference containers. Set true/false values to determine which to build.
build_training = true
build_training = false
build_inference = true

# Set do_build to "false" to skip builds and test the latest image built by this PR
Expand Down
4 changes: 2 additions & 2 deletions huggingface/pytorch/inference/buildspec-neuronx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ images:
device_type: &DEVICE_TYPE neuronx
python_version: &DOCKER_PYTHON_VERSION py3
tag_python_version: &TAG_PYTHON_VERSION py310
neuron_sdk_version: &NEURON_SDK_VERSION sdk2.19.1
neuron_sdk_version: &NEURON_SDK_VERSION sdk2.20.0
os_version: &OS_VERSION ubuntu20.04
transformers_version: &TRANSFORMERS_VERSION 4.41.1
transformers_version: &TRANSFORMERS_VERSION 4.43.2
tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION,"-", *NEURON_SDK_VERSION, '-', *OS_VERSION ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *NEURON_SDK_VERSION, /Dockerfile., *DEVICE_TYPE ]
context:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
FROM ubuntu:20.04

LABEL dlc_major_version="1"
LABEL maintainer="Amazon AI"
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true

ARG PYTHON=python3.10
ARG PYTHON_VERSION=3.10.12
ARG MMS_VERSION=1.1.11
ARG MAMBA_VERSION=23.1.0-4

# Neuron SDK components version numbers
ARG NEURONX_FRAMEWORK_VERSION=2.1.2.2.3.0
ARG NEURONX_DISTRIBUTED_VERSION=0.9.0
ARG NEURONX_CC_VERSION=2.15.128.0
ARG NEURONX_TRANSFORMERS_VERSION=0.12.313
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.22.26.0-17a033bc8
ARG NEURONX_RUNTIME_LIB_VERSION=2.22.14.0-6e27b8d5b
ARG NEURONX_TOOLS_VERSION=2.19.0.0

# HF ARGS
ARG TRANSFORMERS_VERSION
ARG DIFFUSERS_VERSION=0.28.2
ARG OPTIMUM_NEURON_VERSION=0.0.25
ARG SENTENCE_TRANSFORMERS=3.0.1
ARG PEFT_VERSION=0.12.0
ARG DATASETS_VERSION=2.19.0

# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
ENV LD_LIBRARY_PATH /opt/aws/neuron/lib:/lib/x86_64-linux-gnu:/opt/conda/lib/:$LD_LIBRARY_PATH
ENV PATH /opt/conda/bin:/opt/aws/neuron/bin:$PATH
ENV SAGEMAKER_SERVING_MODULE sagemaker_pytorch_serving_container.serving:main
ENV TEMP=/home/model-server/tmp

RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends software-properties-common \
&& add-apt-repository ppa:openjdk-r/ppa \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
build-essential \
apt-transport-https \
ca-certificates \
cmake \
curl \
emacs \
git \
jq \
libgl1-mesa-glx \
libsm6 \
libxext6 \
libxrender-dev \
libgssapi-krb5-2 \
openjdk-11-jdk \
vim \
wget \
unzip \
zlib1g-dev \
libcap-dev \
gpg-agent \
libexpat1 \
libxml2 \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/tmp* \
&& apt-get clean

RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -

# Install Neuronx tools
RUN apt-get update \
&& apt-get install -y \
aws-neuronx-tools=$NEURONX_TOOLS_VERSION \
aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \
aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/tmp* \
&& apt-get clean

# https://github.com/docker-library/openjdk/issues/261 https://github.com/docker-library/openjdk/pull/263/files
RUN keytool -importkeystore -srckeystore /etc/ssl/certs/java/cacerts -destkeystore /etc/ssl/certs/java/cacerts.jks -deststoretype JKS -srcstorepass changeit -deststorepass changeit -noprompt; \
mv /etc/ssl/certs/java/cacerts.jks /etc/ssl/certs/java/cacerts; \
/var/lib/dpkg/info/ca-certificates-java.postinst configure;

RUN curl -L -o ~/mambaforge.sh https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-x86_64.sh \
&& chmod +x ~/mambaforge.sh \
&& ~/mambaforge.sh -b -p /opt/conda \
&& rm ~/mambaforge.sh \
&& /opt/conda/bin/conda update -y conda \
&& /opt/conda/bin/conda install -c conda-forge -y \
python=$PYTHON_VERSION \
pyopenssl \
cython \
mkl-include \
mkl \
botocore \
parso \
scipy \
typing \
# Below 2 are included in miniconda base, but not mamba so need to install
conda-content-trust \
charset-normalizer \
&& /opt/conda/bin/conda update -y conda \
&& /opt/conda/bin/conda clean -ya

RUN conda install -c conda-forge \
scikit-learn \
h5py \
requests \
&& conda clean -ya \
&& pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
&& ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
&& pip install packaging \
enum-compat \
ipython

RUN pip install --no-cache-dir -U \
opencv-python>=4.8.1.78 \
"numpy>=1.22.2, <1.24" \
"scipy>=1.8.0" \
six \
"pillow>=10.0.1" \
"awscli<2" \
pandas==1.* \
boto3 \
cryptography

# Install Neuronx-cc and PyTorch
RUN pip install --extra-index-url https://pip.repos.neuron.amazonaws.com \
neuronx-cc==$NEURONX_CC_VERSION \
torch-neuronx==$NEURONX_FRAMEWORK_VERSION \
neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION \
transformers-neuronx==$NEURONX_TRANSFORMERS_VERSION \
&& pip install "protobuf>=3.18.3,<4" \
&& pip install --no-deps --no-cache-dir -U torchvision==0.16.*

WORKDIR /

RUN pip install --no-cache-dir \
multi-model-server==$MMS_VERSION \
sagemaker-inference

RUN useradd -m model-server \
&& mkdir -p /home/model-server/tmp \
&& chown -R model-server /home/model-server

COPY neuron-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
COPY neuron-monitor.sh /usr/local/bin/neuron-monitor.sh
COPY config.properties /etc/sagemaker-mms.properties

RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
&& chmod +x /usr/local/bin/neuron-monitor.sh

ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py

RUN chmod +x /usr/local/bin/deep_learning_container.py

#################################
# Hugging Face specific section #
#################################

RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch-1.13/license.txt -o /license.txt

# install Hugging Face libraries and its dependencies
RUN pip install --no-cache-dir \
transformers[sentencepiece,audio,vision]==${TRANSFORMERS_VERSION} \
diffusers==${DIFFUSERS_VERSION} \
datasets==${DATASETS_VERSION} \
optimum-neuron==${OPTIMUM_NEURON_VERSION} \
sentence_transformers==${SENTENCE_TRANSFORMERS} \
peft==${PEFT_VERSION} \
"sagemaker-huggingface-inference-toolkit>=2.4.0,<3"

RUN pip install --no-cache-dir -U \
"pillow>=10.0.1" \
"requests<2.32.0"

RUN HOME_DIR=/root \
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
&& chmod +x /usr/local/bin/testOSSCompliance \
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
&& rm -rf ${HOME_DIR}/oss_compliance* \
# conda leaves an empty /root/.cache/conda/notices.cache file which is not removed by conda clean -ya
&& rm -rf ${HOME_DIR}/.cache/conda

EXPOSE 8080 8081
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
CMD ["serve"]
Loading
Loading