Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Add docker config and scripts for Wolfi images #2675

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions dev-tools/docker/build_linux_wolfi_aarch64_build_image.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License
# 2.0 and the following additional limitation. Functionality enabled by the
# files subject to the Elastic License 2.0 may only be used in production when
# invoked by an Elasticsearch process with a license key installed that permits
# use of machine learning features. You may not use this file except in
# compliance with the Elastic License 2.0 and the foregoing additional
# limitation.
#

# Builds the Docker image that can be used to compile the machine learning
# C++ code for Linux.
#
# This script is not intended to be run regularly. When changing the tools
# or 3rd party components required to build the machine learning C++ code
# increment the version, change the Dockerfile and build a new image to be
# used for subsequent builds on this branch. Then update the version to be
# used for builds in docker/linux_builder/Dockerfile.

if [ `uname -m` != aarch64 -a `uname -m` != arm64 ] ; then
echo "Native build images must be built on the correct hardware architecture"
echo "Required: aarch64 or arm64, Current:" `uname -m`
exit 1
fi

DOCKER_DIR=`docker info 2>/dev/null | grep '^ *Docker Root Dir' | awk -F: '{ print $2 }' | sed 's/^ *//'`
echo "Building this image may require up to 50GB of space for Docker"
echo "Current space available in $DOCKER_DIR"
df -h "$DOCKER_DIR"
sleep 5

HOST=docker.elastic.co
ACCOUNT=ml-dev
REPOSITORY=ml-linux-wolfi-aarch64-build
VERSION=1

set -e

cd `dirname $0`

. ./prefetch_docker_image.sh
CONTEXT=linux_wolfi_aarch64_image
prefetch_docker_base_image $CONTEXT/Dockerfile
docker build --no-cache -t $HOST/$ACCOUNT/$REPOSITORY:$VERSION $CONTEXT
# Get a username and password for this by visiting
# https://docker-auth.elastic.co and allowing it to authenticate against your
# GitHub account
docker login $HOST
docker push $HOST/$ACCOUNT/$REPOSITORY:$VERSION

52 changes: 52 additions & 0 deletions dev-tools/docker/build_linux_wolfi_build_image.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License
# 2.0 and the following additional limitation. Functionality enabled by the
# files subject to the Elastic License 2.0 may only be used in production when
# invoked by an Elasticsearch process with a license key installed that permits
# use of machine learning features. You may not use this file except in
# compliance with the Elastic License 2.0 and the foregoing additional
# limitation.
#

# Builds the Docker image that can be used to compile the machine learning
# C++ code for Linux.
#
# This script is not intended to be run regularly. When changing the tools
# or 3rd party components required to build the machine learning C++ code
# increment the version, change the Dockerfile and build a new image to be
# used for subsequent builds on this branch. Then update the version to be
# used for builds in docker/linux_builder/Dockerfile.

if [ `uname -m` != x86_64 ] ; then
echo "Native build images must be built on the correct hardware architecture"
echo "Required: x86_64, Current:" `uname -m`
exit 1
fi

DOCKER_DIR=`docker info 2>/dev/null | grep '^ *Docker Root Dir' | awk -F: '{ print $2 }' | sed 's/^ *//'`
echo "Building this image may require up to 50GB of space for Docker"
echo "Current space available in $DOCKER_DIR"
df -h "$DOCKER_DIR"
sleep 5

HOST=docker.elastic.co
ACCOUNT=ml-dev
REPOSITORY=ml-linux-wolfi-build
VERSION=1

set -e

cd `dirname $0`

. ./prefetch_docker_image.sh
CONTEXT=linux_wolfi_image
prefetch_docker_base_image $CONTEXT/Dockerfile
docker build --no-cache -t $HOST/$ACCOUNT/$REPOSITORY:$VERSION $CONTEXT
# Get a username and password for this by visiting
# https://docker-auth.elastic.co and allowing it to authenticate against your
# GitHub account
docker login $HOST
docker push $HOST/$ACCOUNT/$REPOSITORY:$VERSION

173 changes: 173 additions & 0 deletions dev-tools/docker/linux_wolfi_aarch64_image/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License
# 2.0 and the following additional limitation. Functionality enabled by the
# files subject to the Elastic License 2.0 may only be used in production when
# invoked by an Elasticsearch process with a license key installed that permits
# use of machine learning features. You may not use this file except in
# compliance with the Elastic License 2.0 and the foregoing additional
# limitation.
#

FROM cgr.dev/chainguard/wolfi-base:latest AS builder

# This is basically automating the setup instructions in build-setup/linux.md

MAINTAINER Ed Savage <[email protected]>

# Make sure OS packages are up to date and required packages are installed
# libffi is required for building Python
RUN \
apk update && \
apk add bash build-base bzip2 gcc git libffi-dev make texinfo unzip wget xz zip zlib-dev

# For compiling with hardening and optimisation
ENV CFLAGS "-g -O3 -fstack-protector -D_FORTIFY_SOURCE=2 -march=armv8-a+crc+crypto"
ENV CXXFLAGS "-g -O3 -fstack-protector -D_FORTIFY_SOURCE=2 -march=armv8-a+crc+crypto"
ENV LDFLAGS "-Wl,-z,relro -Wl,-z,now"
ENV LDFLAGS_FOR_TARGET "-Wl,-z,relro -Wl,-z,now"

ARG build_dir=/usr/src

RUN mkdir -p ${build_dir}

# Build gcc 10.3
RUN \
cd ${build_dir} && \
wget --quiet -O - http://ftpmirror.gnu.org/gcc/gcc-10.3.0/gcc-10.3.0.tar.gz | tar zxf - && \
cd gcc-10.3.0 && \
contrib/download_prerequisites && \
sed -i -e 's/$(SHLIB_LDFLAGS)/-Wl,-z,relro -Wl,-z,now $(SHLIB_LDFLAGS)/' libgcc/config/t-slibgcc && \
sed -i '59,65{H;d;};84G' ../gcc-10.3.0/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp && \
cd .. && \
mkdir gcc-10.3.0-build && \
cd gcc-10.3.0-build && \
../gcc-10.3.0/configure --prefix=/usr/local/gcc103 --enable-languages=c,c++ --enable-vtable-verify --with-system-zlib --disable-multilib && \
make -j`nproc` && \
make install && \
cd .. && \
rm -rf gcc-10.3.0 gcc-10.3.0-build

# Update paths to use the newly built compiler in C++17 mode
ENV LD_LIBRARY_PATH /usr/local/gcc103/lib64:/usr/local/gcc103/lib:/usr/lib:/lib
ENV PATH /usr/local/gcc103/bin:/usr/bin:/bin:/usr/sbin:/sbin
ENV CXX "g++ -std=gnu++17"

# Build binutils
RUN \
cd ${build_dir} && \
wget --quiet -O - http://ftpmirror.gnu.org/binutils/binutils-2.37.tar.bz2 | tar jxf - && \
cd binutils-2.37 && \
./configure --prefix=/usr/local/gcc103 --enable-vtable-verify --with-system-zlib --disable-libstdcxx --with-gcc-major-version-only && \
make -j`nproc` && \
make install && \
cd .. && \
rm -rf binutils-2.37

# Build libxml2
RUN \
cd ${build_dir} && \
wget --quiet --no-check-certificate -O - https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.14.tar.xz | tar Jxf - && \
cd libxml2-2.9.14 && \
./configure --prefix=/usr/local/gcc103 --without-python --without-readline && \
make -j`nproc` && \
make install && \
cd .. && \
rm -rf libxml2-2.9.14

# Build Boost
RUN \
cd ${build_dir} && \
wget --quiet -O - https://boostorg.jfrog.io/artifactory/main/release/1.83.0/source/boost_1_83_0.tar.bz2 | tar jxf - && \
cd boost_1_83_0 && \
./bootstrap.sh --without-libraries=context --without-libraries=coroutine --without-libraries=graph_parallel --without-libraries=mpi --without-libraries=python --without-icu && \
sed -i -e 's|(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \\|(3ul)(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \\|' boost/unordered/detail/prime_fmod.hpp && \
./b2 -j`nproc` --layout=versioned --disable-icu pch=off optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_FORTIFY_SOURCE=2 cxxflags='-std=gnu++17 -fstack-protector -march=armv8-a+crc+crypto' linkflags='-std=gnu++17 -Wl,-z,relro -Wl,-z,now' && \
./b2 install --prefix=/usr/local/gcc103 --layout=versioned --disable-icu pch=off optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_FORTIFY_SOURCE=2 cxxflags='-std=gnu++17 -fstack-protector -march=armv8-a+crc+crypto' linkflags='-std=gnu++17 -Wl,-z,relro -Wl,-z,now' && \
cd .. && \
rm -rf boost_1_83_0

# Build patchelf
RUN \
cd ${build_dir} && \
wget --quiet -O - https://github.com/NixOS/patchelf/releases/download/0.13/patchelf-0.13.tar.bz2 | tar jxf - && \
cd patchelf-0.13.20210805.a949ff2 && \
./configure --prefix=/usr/local/gcc103 && \
make -j`nproc` && \
make install && \
cd .. && \
rm -rf patchelf-0.13.20210805.a949ff2

# Build OpenSSL 1.1.1
# This is only needed as a dependency for Python 3.10 during the PyTorch build
# Not using --prefix=/usr/local/gcc103 so that this can be excluded from the final image
RUN \
cd ${build_dir} && \
wget --quiet --no-check-certificate -O - https://www.openssl.org/source/old/1.1.1/openssl-1.1.1q.tar.gz | tar xzf - && \
cd openssl-1.1.1q && \
./Configure --prefix=/usr/local shared linux-aarch64 && \
make -j`nproc` && \
make install && \
cd .. && \
rm -rf openssl-1.1.1q

# Build Python 3.10
# --enable-optimizations for a stable/release build
# Not using --prefix=/usr/local/gcc103 so that this can be excluded from the final image
RUN \
cd ${build_dir} && \
wget --quiet -O - https://www.python.org/ftp/python/3.10.9/Python-3.10.9.tgz | tar xzf - && \
cd Python-3.10.9 && \
./configure --enable-optimizations --with-openssl=/usr/local --with-openssl-rpath=/usr/local/lib && \
make -j`nproc` && \
make altinstall && \
cd .. && \
rm -rf Python-3.10.9

# Install Python dependencies
RUN \
/usr/local/bin/pip3.10 install numpy pyyaml setuptools cffi typing_extensions future six requests dataclasses

# Install CMake
# v3.19.2 minimum is required
RUN \
cd ${build_dir} && \
wget --quiet https://github.com/Kitware/CMake/releases/download/v3.23.2/cmake-3.23.2-Linux-aarch64.sh && \
chmod +x cmake-3.23.2-Linux-aarch64.sh && \
./cmake-3.23.2-Linux-aarch64.sh --skip-license --prefix=/usr/local/gcc103 && \
rm -f cmake-3.23.2-Linux-aarch64.sh

# Clone PyTorch and build LibTorch
# If the PyTorch branch is changed also update PYTORCH_BUILD_VERSION
RUN \
cd ${build_dir} && \
git -c advice.detachedHead=false clone --depth=1 --branch=v2.1.2 https://github.com/pytorch/pytorch.git && \
cd pytorch && \
git submodule sync && \
git submodule update --init --recursive && \
sed -i -e 's/system(/strlen(/' torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp && \
export BLAS=Eigen && \
export BUILD_TEST=OFF && \
export USE_FBGEMM=OFF && \
export USE_KINETO=OFF && \
export USE_DISTRIBUTED=OFF && \
export USE_MKLDNN=ON && \
export USE_QNNPACK=OFF && \
export USE_PYTORCH_QNNPACK=OFF && \
export PYTORCH_BUILD_VERSION=2.1.2 && \
export PYTORCH_BUILD_NUMBER=1 && \
export MAX_JOBS=5 && \
/usr/local/bin/python3.10 setup.py install && \
mkdir /usr/local/gcc103/include/pytorch && \
cp -r torch/include/* /usr/local/gcc103/include/pytorch/ && \
cp torch/lib/libtorch_cpu.so /usr/local/gcc103/lib && \
cp torch/lib/libc10.so /usr/local/gcc103/lib && \
cd .. && \
rm -rf pytorch

FROM cgr.dev/chainguard/wolfi-base:latest
COPY --from=builder /usr/local/gcc103 /usr/local/gcc103
RUN \
apk update && \
apk add bash build-base bzip2 gcc git make unzip zip zlib-dev

Loading