From 2e361823f90329bf2aeac0b86c6fc78600f866c0 Mon Sep 17 00:00:00 2001 From: dafeliton Date: Fri, 30 Aug 2024 15:13:42 -0700 Subject: [PATCH] add debug --- start-cluster.sh | 99 +++++++++++++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 38 deletions(-) diff --git a/start-cluster.sh b/start-cluster.sh index 953f1b9..3bb1911 100644 --- a/start-cluster.sh +++ b/start-cluster.sh @@ -1,58 +1,81 @@ #!/bin/bash -IFS=: read -r FILESYSTEM HOMEMOUNT <<< $(findmnt -n -o SOURCE --target /home/$USER) +set -euxo pipefail -WORKSPACE=$(dirname $HOMEMOUNT) -WORKSPACE=$(dirname $WORKSPACE) +LOG_FILE="/tmp/prestop_$(date +'%Y%m%d_%H%M%S').log" +exec 2> >(tee -a "$LOG_FILE") -helm install $SPARK_CHART_NAME /opt/spark \ - --set image.registry=${SPARK_CLUSTER_IMAGE_REGISTRY:-ghcr.io} \ - --set image.repository=${SPARK_CLUSTER_IMAGE_REPO:-ucsd-ets/spark-node} \ - --set image.tag=${SPARK_CLUSTER_IMAGE_TAG:-fa22-3} \ +echo "Running preStop hook script" | tee -a "$LOG_FILE" + +IFS=: read -r FILESYSTEM HOMEMOUNT <<< $(findmnt -n -o SOURCE --target /home/$USER) || { + echo "Error: Failed to find the filesystem and home mount." >&2 + exit 1 +} + +WORKSPACE=$(dirname "$HOMEMOUNT") || { + echo "Error: Failed to determine the workspace from home mount." >&2 + exit 1 +} +WORKSPACE=$(dirname "$WORKSPACE") || { + echo "Error: Failed to determine the workspace directory." >&2 + exit 1 +} + +echo "Filesystem: $FILESYSTEM, Workspace: $WORKSPACE" | tee -a "$LOG_FILE" + +helm install "$SPARK_CHART_NAME" /opt/spark \ + --debug \ + --set image.registry="${SPARK_CLUSTER_IMAGE_REGISTRY:-ghcr.io}" \ + --set image.repository="${SPARK_CLUSTER_IMAGE_REPO:-ucsd-ets/spark-node}" \ + --set image.tag="${SPARK_CLUSTER_IMAGE_TAG:-fa22-3}" \ --set image.pullPolicy=Always \ --set serviceAccount.name=default \ --set serviceAccount.create=false \ - --set master.podSecurityContext.runAsUser=$UID \ - --set master.containerSecurityContext.runAsUser=$UID \ - --set worker.replicaCount=${SPARK_CLUSTER_REPLICAS:-3} \ - --set worker.podSecurityContext.runAsUser=$UID \ - --set worker.containerSecurityContext.runAsUser=$UID \ - --set master.podSecurityContext.runAsGroup=${SPARK_CLUSTER_RUNASGROUP:-0} \ - --set master.podSecurityContext.fsGroup=${SPARK_CLUSTER_FSGROUP:-0} \ - --set worker.podSecurityContext.runAsGroup=${SPARK_CLUSTER_RUNASGROUP:-0} \ - --set worker.podSecurityContext.fsGroup=${SPARK_CLUSTER_FSGROUP:-0} \ - --set worker.resources.requests.memory=${SPARK_CLUSTER_WORKER_MEM:-20G} \ - --set worker.resources.limits.memory=${SPARK_CLUSTER_WORKER_MEM:-20G} \ - --set worker.coreLimit=${SPARK_CLUSTER_WORKER_CPU:-2} \ - --set worker.resources.limits.cpu=${SPARK_CLUSTER_WORKER_CPU:-2} \ - --set worker.resources.requests.cpu=${SPARK_CLUSTER_WORKER_CPU:-2} \ - --set master.resources.limits.cpu=${SPARK_CLUSTER_MASTER_CPU:-2} \ - --set master.resources.requests.cpu=${SPARK_CLUSTER_MASTER_CPU:-2} \ - --set master.resources.limits.memory=${SPARK_CLUSTER_MASTER_MEM:-8G} \ - --set master.resources.requests.memory=${SPARK_CLUSTER_MASTER_MEM:-8G} \ - --set master.memoryLimit=${SPARK_CLUSTER_MASTER_MEM:-8G} \ - --set worker.memoryLimit=${SPARK_CLUSTER_WORKER_APP_MEM:-18G} \ + --set master.podSecurityContext.runAsUser="$UID" \ + --set master.containerSecurityContext.runAsUser="$UID" \ + --set worker.replicaCount="${SPARK_CLUSTER_REPLICAS:-3}" \ + --set worker.podSecurityContext.runAsUser="$UID" \ + --set worker.containerSecurityContext.runAsUser="$UID" \ + --set master.podSecurityContext.runAsGroup="${SPARK_CLUSTER_RUNASGROUP:-0}" \ + --set master.podSecurityContext.fsGroup="${SPARK_CLUSTER_FSGROUP:-0}" \ + --set worker.podSecurityContext.runAsGroup="${SPARK_CLUSTER_RUNASGROUP:-0}" \ + --set worker.podSecurityContext.fsGroup="${SPARK_CLUSTER_FSGROUP:-0}" \ + --set worker.resources.requests.memory="${SPARK_CLUSTER_WORKER_MEM:-20G}" \ + --set worker.resources.limits.memory="${SPARK_CLUSTER_WORKER_MEM:-20G}" \ + --set worker.coreLimit="${SPARK_CLUSTER_WORKER_CPU:-2}" \ + --set worker.resources.limits.cpu="${SPARK_CLUSTER_WORKER_CPU:-2}" \ + --set worker.resources.requests.cpu="${SPARK_CLUSTER_WORKER_CPU:-2}" \ + --set master.resources.limits.cpu="${SPARK_CLUSTER_MASTER_CPU:-2}" \ + --set master.resources.requests.cpu="${SPARK_CLUSTER_MASTER_CPU:-2}" \ + --set master.resources.limits.memory="${SPARK_CLUSTER_MASTER_MEM:-8G}" \ + --set master.resources.requests.memory="${SPARK_CLUSTER_MASTER_MEM:-8G}" \ + --set master.memoryLimit="${SPARK_CLUSTER_MASTER_MEM:-8G}" \ + --set worker.memoryLimit="${SPARK_CLUSTER_WORKER_APP_MEM:-18G}" \ --set-json="worker.extraVolumes[0]={\"name\":\"course-workspace\",\"nfs\":{\"server\":\"${FILESYSTEM}\",\"path\":\"${WORKSPACE}\"}}" \ --set-json='worker.extraVolumes[1]={"name":"home","persistentVolumeClaim":{"claimName":"home"}}' \ --set-json="worker.extraVolumes[2]={\"name\":\"datasets\",\"nfs\":{\"server\":\"its-dsmlp-fs01.ucsd.edu\",\"path\":\"/export/datasets\"}}" \ --set-json='worker.extraVolumeMounts[0]={"name":"course-workspace","mountPath":"/home/${USER}"}' \ - --set worker.extraVolumeMounts[0].mountPath=/home/$USER \ - --set worker.extraVolumeMounts[0].subPath=home/$USER \ + --set worker.extraVolumeMounts[0].mountPath="/home/$USER" \ + --set worker.extraVolumeMounts[0].subPath="home/$USER" \ --set-json='worker.extraVolumeMounts[1]={"name":"course-workspace","mountPath":"/home/${USER}/public"}' \ - --set worker.extraVolumeMounts[1].mountPath=/home/$USER/public \ - --set worker.extraVolumeMounts[1].subPath=public \ + --set worker.extraVolumeMounts[1].mountPath="/home/$USER/public" \ + --set worker.extraVolumeMounts[1].subPath="public" \ --set-json='worker.extraVolumeMounts[2]={"name":"home","mountPath":"/home/${USER}/private"}' \ - --set worker.extraVolumeMounts[2].mountPath=/home/$USER/private \ + --set worker.extraVolumeMounts[2].mountPath="/home/$USER/private" \ --set-json='worker.extraVolumeMounts[3]={"name":"datasets","mountPath":"/datasets"}' \ --set-json="master.extraVolumes[0]={\"name\":\"course-workspace\",\"nfs\":{\"server\":\"${FILESYSTEM}\",\"path\":\"${WORKSPACE}\"}}" \ --set-json='master.extraVolumes[1]={"name":"home","persistentVolumeClaim":{"claimName":"home"}}' \ --set-json="master.extraVolumes[2]={\"name\":\"datasets\",\"nfs\":{\"server\":\"its-dsmlp-fs01.ucsd.edu\",\"path\":\"/export/datasets\"}}" \ --set-json='master.extraVolumeMounts[0]={"name":"course-workspace","mountPath":"/home/${USER}"}' \ - --set master.extraVolumeMounts[0].mountPath=/home/$USER \ - --set master.extraVolumeMounts[0].subPath=home/$USER \ + --set master.extraVolumeMounts[0].mountPath="/home/$USER" \ + --set master.extraVolumeMounts[0].subPath="home/$USER" \ --set-json='master.extraVolumeMounts[1]={"name":"course-workspace","mountPath":"/home/${USER}/public"}' \ - --set master.extraVolumeMounts[1].mountPath=/home/$USER/public \ - --set master.extraVolumeMounts[1].subPath=public \ + --set master.extraVolumeMounts[1].mountPath="/home/$USER/public" \ + --set master.extraVolumeMounts[1].subPath="public" \ --set-json='master.extraVolumeMounts[2]={"name":"home","mountPath":"/home/${USER}/private"}' \ - --set-json='master.extraVolumeMounts[3]={"name":"datasets","mountPath":"/datasets"}' \ - --set master.extraVolumeMounts[2].mountPath=/home/$USER/private + --set master.extraVolumeMounts[2].mountPath="/home/$USER/private" || { + echo "Error: Helm installation failed." >&2 + exit 1 + } + +echo "preStop hook script completed successfully."