-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocker-compose.yml
109 lines (105 loc) · 3.31 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
version: "3"
services:
# Nessie Catalog Server Using In-Memory Store
nessie:
image: projectnessie/nessie:latest
container_name: nessie
environment:
- QUARKUS_PROFILE=prod
- QUARKUS_HTTP_PORT=19120
- QUARKUS_LOG_CONSOLE_FORMAT=%d{yyyy-MM-dd HH:mm:ss} %-5p [%c{1.}] (%t) %s%e%n
- QUARKUS_LOG_LEVEL=INFO
- QUARKUS_DATASOURCE_DB_KIND=rocksdb
- QUARKUS_DATASOURCE_JDBC_URL=jdbc:rocksdb:file:///nessie/data
- QUARKUS_DATASOURCE_USERNAME=nessie
- QUARKUS_DATASOURCE_PASSWORD=nessie
volumes:
- ./nessie-data:/nessie/data # Mount local directory to persist RocksDB data
ports:
- "19120:19120" # Expose Nessie API port
networks:
intro-network:
# Minio Storage Server
minio:
image: minio/minio
container_name: minio
environment:
- MINIO_ROOT_USER=admin
- MINIO_ROOT_PASSWORD=password
- MINIO_DOMAIN=minio
- MINIO_REGION_NAME=us-east-1
- MINIO_REGION=us-east-1
ports:
- "9000:9000"
- "9001:9001"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3
volumes:
- ./minio-data:/minio-data # Mount the local folder to container
entrypoint: >
/bin/sh -c "
minio server /data --console-address ':9001' &
sleep 5;
mc alias set myminio http://localhost:9000 admin password;
mc mb myminio/datalake;
mc mb myminio/datalakehouse;
mc mb myminio/warehouse;
mc mb myminio/seed;
mc cp /minio-data/* myminio/seed/;
tail -f /dev/null"
networks:
intro-network:
# Spark
spark:
platform: linux/x86_64
image: alexmerced/spark35nb:latest
ports:
- 8080:8080 # Master Web UI
- 7077:7077 # Master Port for job submissions
- 8081:8081 # Worker Web UI
- 4040-4045:4040-4045 # Additional Spark job UI ports for more jobs
- 18080:18080 # Spark History Server
- 8888:8888 # Jupyter Notebook
environment:
- AWS_REGION=us-east-1
- AWS_ACCESS_KEY_ID=admin # Minio username
- AWS_SECRET_ACCESS_KEY=password # Minio password
- SPARK_MASTER_HOST=spark
- SPARK_MASTER_PORT=7077
- SPARK_MASTER_WEBUI_PORT=8080
- SPARK_WORKER_WEBUI_PORT=8081
- SPARK_HISTORY_OPTS=-Dspark.history.fs.logDirectory=/tmp/spark-events
- SPARK_HOME=/opt/spark # Set SPARK_HOME explicitly
volumes:
- ./notebook-seed:/workspace/seed-data # Volume for seeding data into the container
container_name: spark
entrypoint: >
/bin/bash -c "
/opt/spark/sbin/start-master.sh && \
/opt/spark/sbin/start-worker.sh spark://localhost:7077 && \
mkdir -p /tmp/spark-events && \
start-history-server.sh && \
jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='' --NotebookApp.password='' && \
tail -f /dev/null
"
networks:
intro-network:
# Dremio
dremio:
platform: linux/x86_64
image: dremio/dremio-oss:latest
ports:
- 9047:9047
- 31010:31010
- 32010:32010
- 45678:45678
container_name: dremio
environment:
- DREMIO_JAVA_SERVER_EXTRA_OPTS=-Dpaths.dist=file:///opt/dremio/data/dist
networks:
intro-network:
networks:
intro-network: