Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow min disk before clean to be customized #639

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 41 additions & 16 deletions packer/linux/conf/bin/bk-check-disk-space.sh
Original file line number Diff line number Diff line change
@@ -1,25 +1,50 @@
#!/bin/bash
set -euo pipefail

DISK_MIN_AVAILABLE=${DISK_MIN_AVAILABLE:-5242880} # 5GB
DISK_MIN_INODES=${DISK_MIN_INODES:-250000} # docker needs lots

DOCKER_DIR="/var/lib/docker/"

disk_avail=$(df -k --output=avail "$DOCKER_DIR" | tail -n1)

echo "Disk space free: $(df -k -h --output=avail "$DOCKER_DIR" | tail -n1 | sed -e 's/^[[:space:]]//')"

if [[ $disk_avail -lt $DISK_MIN_AVAILABLE ]]; then
echo "Not enough disk space free, cutoff is ${DISK_MIN_AVAILABLE} 🚨" >&2
exit 1
# Usage:
# bk-check-disk-space.sh (min disk required) (min inodes required)
# min disk required can be either an amount of bytes, a pattern like 10G
# or 500M, or a percentage like 5%
# min inodes must be a number, default to 250,000

. "$(dirname "$0")"/dehumanize.sh

min_available=${1:-5G}
docker_dir="/var/lib/docker/"

# First check the disk available

disk_avail=$(df -k --output=avail "$docker_dir" | tail -n1)
disk_avail_human=$(df -k -h --output=avail "$docker_dir" | tail -n1 | tr -d '[:space:]')
disk_used_pct=$(df -k --output=pcent "$docker_dir" | tail -n1 | tr -d '[:space:]' | tr -d '%')
disk_free_pct=$((100-disk_used_pct))

printf "Disk space free: %s (%s%%)\\n" "$disk_avail_human" "$disk_free_pct"

# Check if the min_available is a percentage
if [[ $min_available =~ \%$ ]] ; then
if [[ $(echo "${disk_free_pct}<${min_available}" | sed 's/%//g' | bc) -gt 0 ]] ; then
echo "Not enough disk space free, cutoff is ${min_available} 🚨" >&2
exit 1
fi
else
if [[ $disk_avail -lt $(dehumanize "$min_available") ]]; then
echo "Not enough disk space free, cutoff is ${min_available} 🚨" >&2
exit 1
fi
fi

inodes_avail=$(df -k --output=iavail "$DOCKER_DIR" | tail -n1)
# Next check inodes, these can be exhausted by docker build operations

inodes_min_available=${2:-250000}
inodes_avail=$(df -k --output=iavail "$docker_dir" | tail -n1 | tr -d '[:space:]')
inodes_avail_human=$(df -k -h --output=iavail "$docker_dir" | tail -n1 | tr -d '[:space:]')
inodes_used_pct=$(df -k --output=ipcent "$docker_dir" | tail -n1 | tr -d '[:space:]' | tr -d '%')
inodes_free_pct=$((100-inodes_used_pct))

echo "Inodes free: $(df -k -h --output=iavail "$DOCKER_DIR" | tail -n1 | sed -e 's/^[[:space:]]//')"
printf "Inodes free: %s (%s%%)\\n" "$inodes_avail_human" "$inodes_free_pct"

if [[ $inodes_avail -lt $DISK_MIN_INODES ]]; then
echo "Not enough inodes free, cutoff is ${DISK_MIN_INODES} 🚨" >&2
if [[ $inodes_avail -lt $inodes_min_available ]]; then
echo "Not enough inodes free, cutoff is ${inodes_min_available} 🚨" >&2
exit 1
fi
6 changes: 6 additions & 0 deletions packer/linux/conf/bin/bk-install-elastic-stack.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ export PLUGINS_ENABLED="${PLUGINS_ENABLED[*]-}"
export BUILDKITE_ECR_POLICY=${BUILDKITE_ECR_POLICY:-none}
EOF

# cron-env is sourced by crontab entries and low disk scripts
cat << EOF > /var/lib/buildkite-agent/cron-env
export DISK_MIN_AVAILABLE=$DISK_MIN_AVAILABLE
export DOCKER_PRUNE_UNTIL=$DOCKER_PRUNE_UNTIL
EOF

if [[ "${BUILDKITE_AGENT_RELEASE}" == "edge" ]] ; then
echo "Downloading buildkite-agent edge..."
curl -Lsf -o /usr/bin/buildkite-agent-edge \
Expand Down
39 changes: 39 additions & 0 deletions packer/linux/conf/bin/dehumanize-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash
set -o pipefail

. "$(dirname "$0")"/dehumanize.sh

test_without_unit(){
assertEquals 45 $(dehumanize 45)
}

test_bytes(){
assertEquals 45 $(dehumanize 45b)
assertEquals 45 $(dehumanize 45B)
}

test_kilobytes(){
assertEquals 46080 $(dehumanize 45kb)
assertEquals 46080 $(dehumanize 45KB)
}

test_megabytes(){
assertEquals 47185920 $(dehumanize 45mb)
assertEquals 47185920 $(dehumanize 45MB)
}

test_gigabytes(){
assertEquals 48318382080 $(dehumanize 45gb)
assertEquals 48318382080 $(dehumanize 45GB)
}

test_terabytes(){
assertEquals 49478023249920 $(dehumanize 45tb)
assertEquals 49478023249920 $(dehumanize 45TB)
}

test_using_decimals(){
assertEquals 1610612736 $(dehumanize 1.5gb)
}

. shunit2
10 changes: 10 additions & 0 deletions packer/linux/conf/bin/dehumanize.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash

# Converts human-readable units like 1.43K and 120.3M to bytes
dehumanize() {
awk '/[0-9][bB]?$/ {printf "%u\n", $1*1}
/[tT][bB]?$/ {printf "%u\n", $1*(1024*1024*1024*1024)}
/[gG][bB]?$/ {printf "%u\n", $1*(1024*1024*1024)}
/[mM][bB]?$/ {printf "%u\n", $1*(1024*1024)}
/[kK][bB]?$/ {printf "%u\n", $1*1024}' <<< "$1"
}
9 changes: 7 additions & 2 deletions packer/linux/conf/buildkite-agent/hooks/environment
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ source ~/cfn-env
echo "~~~ :llama: Setting up elastic stack environment ($BUILDKITE_STACK_VERSION)"
cat ~/cfn-env

if [[ -f ~/cron-env ]] ; then
# shellcheck source=/dev/null
source ~/cron-env
fi

echo "Checking docker"
if ! docker ps ; then
echo "^^^ +++"
Expand All @@ -17,13 +22,13 @@ if ! docker ps ; then
fi

echo "Checking disk space"
if ! /usr/local/bin/bk-check-disk-space.sh ; then
if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-}" ; then

echo "Cleaning up docker resources older than ${DOCKER_PRUNE_UNTIL:-4h}"
docker image prune --all --force --filter "until=${DOCKER_PRUNE_UNTIL:-4h}"

echo "Checking disk space again"
if ! /usr/local/bin/bk-check-disk-space.sh ; then
if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-}"; then
echo "Disk health checks failed" >&2
exit 1
fi
Expand Down
12 changes: 9 additions & 3 deletions packer/linux/conf/docker/cron.hourly/docker-gc
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@ if [[ $EUID -eq 0 ]]; then
exec >> /var/log/elastic-stack.log 2>&1 # Logs to elastic-stack.log
fi

DOCKER_PRUNE_UNTIL=${DOCKER_PRUNE_UNTIL:-4h}
# Load config from file if it exists
if [[ -f /var/lib/buildkite-agent/cron-env ]] ; then
# shellcheck source=/dev/null
source /var/lib/buildkite-agent/cron-env
else
DOCKER_PRUNE_UNTIL=4h
fi

## ------------------------------------------
## Prune stuff that doesn't affect cache hits

docker network prune --force --filter "until=${DOCKER_PRUNE_UNTIL}"
docker container prune --force --filter "until=${DOCKER_PRUNE_UNTIL}"
docker network prune --force --filter "until=${!DOCKER_PRUNE_UNTIL}"
docker container prune --force --filter "until=${!DOCKER_PRUNE_UNTIL}"
16 changes: 10 additions & 6 deletions packer/linux/conf/docker/cron.hourly/docker-low-disk-gc
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ if [[ $EUID -eq 0 ]]; then
exec >> /var/log/elastic-stack.log 2>&1 # Logs to elastic-stack.log
fi

DOCKER_PRUNE_UNTIL=${DOCKER_PRUNE_UNTIL:-1h}

mark_instance_unhealthy() {
# cancel any running buildkite builds
killall -QUIT buildkite-agent || true
Expand All @@ -19,14 +17,20 @@ mark_instance_unhealthy() {

trap mark_instance_unhealthy ERR

# Load config from file if it exists
if [[ -f /var/lib/buildkite-agent/cron-env ]] ; then
# shellcheck source=/dev/null
source /var/lib/buildkite-agent/cron-env
fi

## -----------------------------------------------------------------
## Check disk, we only want to prune images/containers if we have to

if ! /usr/local/bin/bk-check-disk-space.sh ; then
echo "Cleaning up docker resources older than ${DOCKER_PRUNE_UNTIL}"
docker image prune --all --force --filter "until=${DOCKER_PRUNE_UNTIL}"
if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-}" ; then
echo "Cleaning up docker resources older than 1h"
docker image prune --all --force --filter "until=1h"

if ! /usr/local/bin/bk-check-disk-space.sh ; then
if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-}" ; then
echo "Disk health checks failed" >&2
exit 1
fi
Expand Down
12 changes: 12 additions & 0 deletions templates/aws-stack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,16 @@ Parameters:
- "false"
Default: "false"

MinimumDiskAvailableBeforeCleanup:
Type: String
Description: Either a percentage (%) or absolute unit (B, MB, GB) of disk below which disk cleanup is run
Default: "2GB"

DockerPruneUntil:
Type: String
Description: How far back to prune docker networks images and containers on hourly cleanup
Default: "4h"

Outputs:
VpcId:
Value:
Expand Down Expand Up @@ -857,6 +867,8 @@ Resources:
BUILDKITE_ECR_POLICY=${ECRAccessPolicy} \
BUILDKITE_TERMINATE_INSTANCE_AFTER_JOB=${BuildkiteTerminateInstanceAfterJob} \
BUILDKITE_ADDITIONAL_SUDO_PERMISSIONS=${BuildkiteAdditionalSudoPermissions} \
DISK_MIN_AVAILABLE="${MinimumDiskAvailableBeforeCleanup}" \
DOCKER_PRUNE_UNTIL="${DockerPruneUntil}" \
AWS_DEFAULT_REGION=${AWS::Region} \
SECRETS_PLUGIN_ENABLED=${EnableSecretsPlugin} \
ECR_PLUGIN_ENABLED=${EnableECRPlugin} \
Expand Down