-
Notifications
You must be signed in to change notification settings - Fork 476
/
Dockerfile
88 lines (64 loc) · 3.24 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#---
# name: text-generation-inference
# group: llm
# depends: [pytorch, torchvision, bitsandbytes, transformers, rust]
# requires: '>=34.1.0'
# notes: https://github.com/huggingface/text-generation-inference
#---
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
WORKDIR /opt
# install protoc (we don't need the full version from protobuf:cpp,
# and the version from protobuf:apt is too old - 21.12 is in the HF docs)
ARG PROTOC_URL=https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-linux-aarch_64.zip
ARG PROTOC_ZIP=protoc-21.12-linux-aarch_64.zip
RUN wget --quiet --show-progress --progress=bar:force:noscroll --no-check-certificate ${PROTOC_URL} -O ${PROTOC_ZIP} && \
unzip -o ${PROTOC_ZIP} -d /usr/local bin/protoc && \
unzip -o ${PROTOC_ZIP} -d /usr/local 'include/*' && \
rm ${PROTOC_ZIP}
RUN which protoc && protoc --version
# parts of the makefiles refer to 'python' instead of 'python3'
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \
update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
RUN git clone --depth=1 https://github.com/huggingface/text-generation-inference
WORKDIR /opt/text-generation-inference/server
RUN sed 's|^bitsandbytes==.*|bitsandbytes|g' -i requirements.txt
ARG BUILD_EXTENSIONS=True
RUN env
RUN make gen-server
RUN sed 's|python_version >= "3.9"|python_version >= "3.8"|g' -i requirements.txt && \
sed 's|==.* ;|\ ;|g' -i requirements.txt && \
#sed 's|^scipy.*|scipy|g' -i requirements.txt && \
#sed 's|^opentelemetry-api.*|opentelemetry-api|g' -i requirements.txt && \
cat requirements.txt
RUN pip3 install --no-cache-dir --verbose -r requirements.txt
RUN sed 's|^python = .*|python = "^3.8"|g' -i pyproject.toml && \
sed 's|^protobuf = .*|protobuf = \"*\"|g' -i pyproject.toml && \
sed 's|^grpcio = .*|grpcio = \"*\"|g' -i pyproject.toml && \
sed 's|^grpcio-status = .*|grpcio-status = \"*\"|g' -i pyproject.toml && \
sed 's|^grpcio-reflection = .*|grpcio-reflection = \"*\"|g' -i pyproject.toml && \
sed 's|^opentelemetry-api = .*|opentelemetry-api = \"*\"|g' -i pyproject.toml && \
sed 's|^opentelemetry-exporter-otlp = .*|opentelemetry-exporter-otlp = \"*\"|g' -i pyproject.toml && \
sed 's|^opentelemetry-instrumentation-grpc = .*|opentelemetry-instrumentation-grpc = \"*\"|g' -i pyproject.toml && \
sed 's|^scipy = .*|scipy = \"*\"|g' -i pyproject.toml && \
cat pyproject.toml
RUN pip3 install --verbose --no-deps -e ".[bnb, accelerate]" --verbose
WORKDIR /opt/text-generation-inference/
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libssl-dev \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
RUN make install-router
RUN make install-launcher
ARG TORCH_CUDA_ARCH_LIST
RUN env
RUN sed 's|\"-arch=compute_80\",| |g' -i server/custom_kernels/setup.py && \
cat server/custom_kernels/setup.py
RUN make install-custom-kernels
# install the text-generation client library
RUN pip3 install --no-cache-dir --verbose text-generation
# re-install bitsandbytes because it got overwritten
RUN pip3 install --no-cache-dir --verbose /opt/bitsandbytes*.whl
# make sure it loads
RUN cd /opt/text-generation-inference/server/text_generation_server && python3 cli.py --help