-
Notifications
You must be signed in to change notification settings - Fork 0
/
start-cluster.sh
59 lines (51 loc) · 1.95 KB
/
start-cluster.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash
# Modify this script by copying it to your home directory
# *** be sure to remove the "exec" line below from your copy! ***
USERSCRIPT=$HOME/start-cluster.sh
if [ -x "$USERSCRIPT" ]; then
exec "$USERSCRIPT" "$@"
fi
exec 2>>$HOME/ray-head.log 1>&2
CPU_GUARANTEE=${CPU_GUARANTEE:-$(( $KUBERNETES_LIMIT_CPU))}
MEM_GUARANTEE=${MEM_GUARANTEE:-$(( $KUBERNETES_LIMIT_MEM ))}
OBJECT_STORE_MEM=$(( $MEM_GUARANTEE / 2))
# Datahub exposes limits/requests as floating point, Ray wants int
MY_CPU_REQUEST=$(printf "%.0f" "$CPU_GUARANTEE")
ray start --head --port=6380 --num-cpus=$MY_CPU_REQUEST --dashboard-host=0.0.0.0 --object-manager-port=8076 --node-manager-port=8077 --dashboard-agent-grpc-port=8078 --dashboard-agent-listen-port=52365 --disable-usage-stats --object-store-memory $OBJECT_STORE_MEM --memory $MEM_GUARANTEE --system-config='{"object_spilling_config":"{\"type\":\"filesystem\",\"params\":{\"directory_path\":\"/tmp/spill\"}}"}'
if ! kubectl get svc service-ray-cluster 2>/dev/null > /dev/null; then
kubectl create -f - <<EOM
# Ray head node service, allowing worker pods to discover the head node to perform the bidirectional communication.
# More contexts can be found at [the Ports configurations doc](https://docs.ray.io/en/latest/ray-core/configure.html#ports-configurations).
apiVersion: v1
kind: Service
metadata:
name: service-ray-cluster
labels:
app: ray-cluster-head
spec:
clusterIP: None
ports:
- name: client
protocol: TCP
port: 10001
targetPort: 10001
- name: dashboard
protocol: TCP
port: 8265
targetPort: 8265
- name: gcs-server
protocol: TCP
port: 6380
targetPort: 6380
selector:
"dsmlp/app": spark
EOM
fi
# Now fire up workers
if [ -x "$HOME/start-workers.sh" ]; then
exec "$HOME/start-workers.sh" "$@"
else
exec /opt/ray-support/start-workers.sh "$@"
fi
# Execution shouldn't reach here unless both start-workers scripts are missing, if so assume that's intentional
exit 0