-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile.ollama
44 lines (33 loc) · 1.12 KB
/
Dockerfile.ollama
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# Dockerfile for Ollama API server with Qwen model
# Build: docker build -t ollama-qwen:0.1 -f Dockerfile.ollama .
# credit: DevFest Pwani 2024 in Kenya. Presentation Inference Your LLMs on the fly: Serverless Cloud Run with GPU Acceleration
# https://jochen.kirstaetter.name/
FROM ollama/ollama:0.3.3
# Metadata
LABEL maintainer="Shuyib" \
description="Ollama API server with Qwen model" \
version="0.1"
# Add user
RUN adduser --system --group ollama
# Set workdir
WORKDIR /app
# Set environment variables
ENV OLLAMA_HOST=0.0.0.0:11434 \
OLLAMA_MODELS=/models \
OLLAMA_DEBUG=false \
OLLAMA_KEEP_ALIVE=-1 \
MODEL=qwen2.5:0.5b
# Create models directory
RUN mkdir -p /models && \
chown -R ollama:ollama /models
# define user
USER ollama
# Pull model
RUN ollama serve & sleep 5 && ollama pull $MODEL
# Expose port
EXPOSE 11434
# Healthcheck: curl localhost:11434/api/version
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:11434/api/version > /dev/null && echo "Ollama is healthy" || exit 1
# Entrypoint: ollama serve
ENTRYPOINT ["ollama", "serve"]