Skip to content

Commit b8b6c9e

Browse files
author
Work Flow Manager
committed
Add official Firecrawl Docker setup
- Created Dockerfile.official following SELF_HOST.md instructions - Added docker-compose.production.yml with all services - Configured for Gemini + Ollama hybrid mode - Includes Redis, Playwright, API and Worker services - Ready for production deployment
1 parent e9e00c8 commit b8b6c9e

File tree

4 files changed

+389
-0
lines changed

4 files changed

+389
-0
lines changed

Dockerfile.all-in-one

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
# Firecrawl All-in-One Production Image
2+
# Inclui: Redis, Playwright, API e Workers
3+
FROM node:20-slim
4+
5+
# Install dependencies
6+
RUN apt-get update && apt-get install -y \
7+
curl \
8+
git \
9+
python3 \
10+
python3-pip \
11+
build-essential \
12+
wget \
13+
gnupg \
14+
ca-certificates \
15+
redis-server \
16+
supervisor \
17+
&& rm -rf /var/lib/apt/lists/*
18+
19+
# Install Chrome for Playwright
20+
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
21+
&& echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list \
22+
&& apt-get update \
23+
&& apt-get install -y google-chrome-stable \
24+
&& rm -rf /var/lib/apt/lists/*
25+
26+
WORKDIR /app
27+
28+
# Clone and setup Firecrawl
29+
RUN git clone https://github.com/mendableai/firecrawl.git . \
30+
&& cd apps/api && npm install \
31+
&& npx playwright install --with-deps chromium
32+
33+
# Setup Playwright service
34+
RUN mkdir -p /playwright-service
35+
WORKDIR /playwright-service
36+
RUN npm init -y && npm install express playwright
37+
38+
# Create Playwright service
39+
COPY <<'EOF' /playwright-service/server.js
40+
const express = require('express');
41+
const { chromium } = require('playwright');
42+
const app = express();
43+
app.use(express.json());
44+
45+
app.post('/scrape', async (req, res) => {
46+
const { url } = req.body;
47+
const browser = await chromium.launch({
48+
proxy: process.env.PROXY_SERVER ? {
49+
server: process.env.PROXY_SERVER,
50+
username: process.env.PROXY_USERNAME,
51+
password: process.env.PROXY_PASSWORD
52+
} : undefined
53+
});
54+
try {
55+
const page = await browser.newPage();
56+
await page.goto(url, { waitUntil: 'networkidle' });
57+
const content = await page.content();
58+
res.json({ content });
59+
} catch (error) {
60+
res.status(500).json({ error: error.message });
61+
} finally {
62+
await browser.close();
63+
}
64+
});
65+
66+
app.listen(3000, () => console.log('Playwright service on port 3000'));
67+
EOF
68+
69+
# Configure Redis
70+
RUN sed -i 's/^bind .*/bind 127.0.0.1/' /etc/redis/redis.conf \
71+
&& echo "maxmemory 2gb" >> /etc/redis/redis.conf \
72+
&& echo "maxmemory-policy allkeys-lru" >> /etc/redis/redis.conf
73+
74+
# Supervisor configuration
75+
COPY <<'EOF' /etc/supervisor/conf.d/supervisord.conf
76+
[supervisord]
77+
nodaemon=true
78+
logfile=/var/log/supervisor/supervisord.log
79+
pidfile=/var/run/supervisord.pid
80+
81+
[program:redis]
82+
command=redis-server /etc/redis/redis.conf
83+
autostart=true
84+
autorestart=true
85+
stdout_logfile=/var/log/redis.log
86+
stderr_logfile=/var/log/redis_error.log
87+
88+
[program:playwright]
89+
command=node /playwright-service/server.js
90+
directory=/playwright-service
91+
autostart=true
92+
autorestart=true
93+
environment=PROXY_SERVER="http://xcuuwcfa:yccpzo0b9nth@45.61.100.172:6440",PROXY_USERNAME="xcuuwcfa",PROXY_PASSWORD="yccpzo0b9nth"
94+
stdout_logfile=/var/log/playwright.log
95+
stderr_logfile=/var/log/playwright_error.log
96+
97+
[program:firecrawl-api]
98+
command=npm run start:production
99+
directory=/app/apps/api
100+
autostart=true
101+
autorestart=true
102+
environment=NODE_ENV="production",PORT="3002",HOST="0.0.0.0",REDIS_URL="redis://127.0.0.1:6379",PLAYWRIGHT_MICROSERVICE_URL="http://127.0.0.1:3000/scrape"
103+
stdout_logfile=/var/log/firecrawl-api.log
104+
stderr_logfile=/var/log/firecrawl-api_error.log
105+
106+
[program:firecrawl-worker]
107+
command=npm run worker
108+
directory=/app/apps/api
109+
autostart=true
110+
autorestart=true
111+
environment=NODE_ENV="production",REDIS_URL="redis://127.0.0.1:6379",PLAYWRIGHT_MICROSERVICE_URL="http://127.0.0.1:3000/scrape"
112+
stdout_logfile=/var/log/firecrawl-worker.log
113+
stderr_logfile=/var/log/firecrawl-worker_error.log
114+
EOF
115+
116+
WORKDIR /app/apps/api
117+
118+
# Copy production environment
119+
COPY .env.production /app/apps/api/.env
120+
121+
# Environment variables
122+
ENV NODE_ENV=production \
123+
PORT=3002 \
124+
HOST=0.0.0.0 \
125+
USE_DB_AUTHENTICATION=false \
126+
GEMINI_API_KEY=AIzaSyAJWyUi0s9DHtRorkuDVnpoxDXqDGTqNdw \
127+
GEMINI_MODEL=gemini-2.5-flash-latest \
128+
OLLAMA_BASE_URL=http://host.docker.internal:11434 \
129+
MODEL_EMBEDDING_NAME=nomic-embed-text \
130+
AI_STRATEGY=hybrid \
131+
AI_PRIMARY=gemini \
132+
AI_EMBEDDINGS=ollama \
133+
REDIS_URL=redis://127.0.0.1:6379 \
134+
PLAYWRIGHT_MICROSERVICE_URL=http://127.0.0.1:3000/scrape \
135+
PROXY_SERVER=http://xcuuwcfa:yccpzo0b9nth@45.61.100.172:6440 \
136+
PROXY_USERNAME=xcuuwcfa \
137+
PROXY_PASSWORD=yccpzo0b9nth \
138+
WEBSHARE_API_KEY=7tk8w3pjogiwpwpf84y0m8wc6zj6yrn8ycvhrrz8 \
139+
PROXY_ROTATION_ENABLED=true \
140+
RATE_LIMIT_ENABLED=true \
141+
RATE_LIMIT_MAX_REQUESTS=411 \
142+
RATE_LIMIT_WINDOW_MS=3600000 \
143+
MAX_CONCURRENT_REQUESTS=25 \
144+
TIMEOUT_MS=30000 \
145+
TEST_API_KEY=7877e105e5f7b9ec3edf4a8eec5059ab9914efef1b30fe232f59ff31cb8e6fcf
146+
147+
EXPOSE 3002
148+
149+
# Start all services with supervisor
150+
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

Dockerfile.official

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Firecrawl Official Docker Build
2+
FROM node:20-slim as builder
3+
4+
WORKDIR /app
5+
6+
# Install build dependencies
7+
RUN apt-get update && apt-get install -y \
8+
git \
9+
python3 \
10+
build-essential \
11+
&& rm -rf /var/lib/apt/lists/*
12+
13+
# Clone Firecrawl repository
14+
RUN git clone https://github.com/mendableai/firecrawl.git .
15+
16+
# Build API
17+
WORKDIR /app/apps/api
18+
RUN npm install -g pnpm && pnpm install && pnpm run build
19+
20+
# Production image
21+
FROM node:20-slim
22+
23+
RUN apt-get update && apt-get install -y \
24+
curl \
25+
&& rm -rf /var/lib/apt/lists/*
26+
27+
WORKDIR /app/apps/api
28+
29+
# Copy built application
30+
COPY --from=builder /app/apps/api ./
31+
32+
# Install production dependencies only
33+
RUN npm install -g pnpm && pnpm install --prod
34+
35+
# Copy environment file
36+
COPY .env.production /app/apps/api/.env
37+
38+
# Set environment variables
39+
ENV NODE_ENV=production \
40+
PORT=3002 \
41+
HOST=0.0.0.0 \
42+
USE_DB_AUTHENTICATION=false \
43+
GEMINI_API_KEY=AIzaSyAJWyUi0s9DHtRorkuDVnpoxDXqDGTqNdw \
44+
OPENAI_API_KEY=AIzaSyAJWyUi0s9DHtRorkuDVnpoxDXqDGTqNdw \
45+
OPENAI_BASE_URL=https://generativelanguage.googleapis.com/v1beta \
46+
MODEL_NAME=gemini-2.5-flash-latest \
47+
OLLAMA_BASE_URL=http://host.docker.internal:11434 \
48+
MODEL_EMBEDDING_NAME=nomic-embed-text \
49+
REDIS_URL=redis://redis:6379 \
50+
REDIS_RATE_LIMIT_URL=redis://redis:6379 \
51+
PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/scrape \
52+
PROXY_SERVER=http://xcuuwcfa:yccpzo0b9nth@45.61.100.172:6440 \
53+
PROXY_USERNAME=xcuuwcfa \
54+
PROXY_PASSWORD=yccpzo0b9nth \
55+
WEBSHARE_API_KEY=7tk8w3pjogiwpwpf84y0m8wc6zj6yrn8ycvhrrz8 \
56+
TEST_API_KEY=7877e105e5f7b9ec3edf4a8eec5059ab9914efef1b30fe232f59ff31cb8e6fcf \
57+
BULL_AUTH_KEY=firecrawl-bull-2024-secure \
58+
MAX_CPU=0.8 \
59+
MAX_RAM=0.8
60+
61+
EXPOSE 3002
62+
63+
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
64+
CMD curl -f http://localhost:3002/health || exit 1
65+
66+
CMD ["pnpm", "run", "start:production"]

docker-compose.production.yml

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
name: firecrawl-production
2+
3+
services:
4+
# Redis service
5+
redis:
6+
image: redis:7-alpine
7+
container_name: firecrawl-redis
8+
command: redis-server --bind 0.0.0.0 --maxmemory 2gb --maxmemory-policy allkeys-lru --appendonly yes
9+
volumes:
10+
- redis_data:/data
11+
networks:
12+
- firecrawl-network
13+
restart: unless-stopped
14+
healthcheck:
15+
test: ["CMD", "redis-cli", "ping"]
16+
interval: 10s
17+
timeout: 5s
18+
retries: 5
19+
20+
# Playwright service
21+
playwright-service:
22+
image: ghcr.io/mendableai/playwright-service:latest
23+
container_name: firecrawl-playwright
24+
environment:
25+
PORT: 3000
26+
PROXY_SERVER: ${PROXY_SERVER:-http://xcuuwcfa:yccpzo0b9nth@45.61.100.172:6440}
27+
PROXY_USERNAME: ${PROXY_USERNAME:-xcuuwcfa}
28+
PROXY_PASSWORD: ${PROXY_PASSWORD:-yccpzo0b9nth}
29+
BLOCK_MEDIA: ${BLOCK_MEDIA:-true}
30+
networks:
31+
- firecrawl-network
32+
restart: unless-stopped
33+
34+
# Firecrawl API
35+
api:
36+
image: ghcr.io/work-flow-manager/firecral-vds:latest
37+
build:
38+
context: .
39+
dockerfile: Dockerfile.official
40+
container_name: firecrawl-api
41+
environment:
42+
# Core settings
43+
HOST: "0.0.0.0"
44+
PORT: 3002
45+
NODE_ENV: production
46+
USE_DB_AUTHENTICATION: "false"
47+
48+
# Redis
49+
REDIS_URL: redis://redis:6379
50+
REDIS_RATE_LIMIT_URL: redis://redis:6379
51+
52+
# Playwright
53+
PLAYWRIGHT_MICROSERVICE_URL: http://playwright-service:3000/scrape
54+
55+
# AI Configuration - Gemini
56+
GEMINI_API_KEY: ${GEMINI_API_KEY:-AIzaSyAJWyUi0s9DHtRorkuDVnpoxDXqDGTqNdw}
57+
OPENAI_API_KEY: ${GEMINI_API_KEY:-AIzaSyAJWyUi0s9DHtRorkuDVnpoxDXqDGTqNdw}
58+
OPENAI_BASE_URL: https://generativelanguage.googleapis.com/v1beta
59+
MODEL_NAME: gemini-2.5-flash-latest
60+
61+
# Ollama for embeddings
62+
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
63+
MODEL_EMBEDDING_NAME: nomic-embed-text
64+
65+
# Proxy settings
66+
PROXY_SERVER: ${PROXY_SERVER:-http://xcuuwcfa:yccpzo0b9nth@45.61.100.172:6440}
67+
PROXY_USERNAME: ${PROXY_USERNAME:-xcuuwcfa}
68+
PROXY_PASSWORD: ${PROXY_PASSWORD:-yccpzo0b9nth}
69+
70+
# API Keys
71+
TEST_API_KEY: ${TEST_API_KEY:-7877e105e5f7b9ec3edf4a8eec5059ab9914efef1b30fe232f59ff31cb8e6fcf}
72+
BULL_AUTH_KEY: ${BULL_AUTH_KEY:-firecrawl-bull-2024-secure}
73+
74+
# Performance
75+
MAX_CPU: "0.8"
76+
MAX_RAM: "0.8"
77+
78+
depends_on:
79+
- redis
80+
- playwright-service
81+
ports:
82+
- "${PORT:-3002}:3002"
83+
networks:
84+
- firecrawl-network
85+
extra_hosts:
86+
- "host.docker.internal:host-gateway"
87+
restart: unless-stopped
88+
volumes:
89+
- firecrawl_data:/app/data
90+
command: ["pnpm", "run", "start:production"]
91+
92+
# Firecrawl Worker
93+
worker:
94+
image: ghcr.io/work-flow-manager/firecral-vds:latest
95+
build:
96+
context: .
97+
dockerfile: Dockerfile.official
98+
container_name: firecrawl-worker
99+
environment:
100+
# Same as API but for worker
101+
NODE_ENV: production
102+
REDIS_URL: redis://redis:6379
103+
REDIS_RATE_LIMIT_URL: redis://redis:6379
104+
PLAYWRIGHT_MICROSERVICE_URL: http://playwright-service:3000/scrape
105+
106+
# AI Configuration
107+
GEMINI_API_KEY: ${GEMINI_API_KEY:-AIzaSyAJWyUi0s9DHtRorkuDVnpoxDXqDGTqNdw}
108+
MODEL_NAME: gemini-2.5-flash-latest
109+
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
110+
MODEL_EMBEDDING_NAME: nomic-embed-text
111+
112+
# Proxy
113+
PROXY_SERVER: ${PROXY_SERVER:-http://xcuuwcfa:yccpzo0b9nth@45.61.100.172:6440}
114+
PROXY_USERNAME: ${PROXY_USERNAME:-xcuuwcfa}
115+
PROXY_PASSWORD: ${PROXY_PASSWORD:-yccpzo0b9nth}
116+
117+
# Performance
118+
MAX_CPU: "0.8"
119+
MAX_RAM: "0.8"
120+
121+
depends_on:
122+
- redis
123+
- playwright-service
124+
- api
125+
networks:
126+
- firecrawl-network
127+
extra_hosts:
128+
- "host.docker.internal:host-gateway"
129+
restart: unless-stopped
130+
volumes:
131+
- firecrawl_data:/app/data
132+
command: ["pnpm", "run", "workers"]
133+
134+
volumes:
135+
redis_data:
136+
firecrawl_data:
137+
138+
networks:
139+
firecrawl-network:
140+
driver: bridge

easypanel-simple.json

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"name": "firecrawl-complete",
3+
"description": "Firecrawl All-in-One (Redis + Playwright + API + Workers)",
4+
"services": [
5+
{
6+
"name": "firecrawl-aio",
7+
"image": "ghcr.io/work-flow-manager/firecral-vds:all-in-one",
8+
"domains": ["firecrawl.wmappliances.cloud"],
9+
"ports": ["3002:3002"],
10+
"environment": {
11+
"NODE_ENV": "production",
12+
"PORT": "3002",
13+
"HOST": "0.0.0.0",
14+
"TEST_API_KEY": "7877e105e5f7b9ec3edf4a8eec5059ab9914efef1b30fe232f59ff31cb8e6fcf",
15+
"GEMINI_API_KEY": "AIzaSyAJWyUi0s9DHtRorkuDVnpoxDXqDGTqNdw",
16+
"OLLAMA_BASE_URL": "http://host.docker.internal:11434"
17+
},
18+
"volumes": [
19+
{
20+
"name": "firecrawl_data",
21+
"mountPath": "/app/data"
22+
}
23+
],
24+
"restart": "unless-stopped",
25+
"healthCheck": {
26+
"test": ["CMD", "curl", "-f", "http://localhost:3002/health"],
27+
"interval": 30,
28+
"timeout": 10,
29+
"retries": 3
30+
}
31+
}
32+
]
33+
}

0 commit comments

Comments
 (0)