Skip to content

Commit 8db5ff1

Browse files
authored
chore(ci): fix error handling & add timeout (#3835)
* chore(ci): fix error handling * add timeout for log collection Sometimes the log collection is stuck * Fix cleanup to run only once at the exit * Retest * Retest * Add AI rule for `set`
1 parent d25e0b3 commit 8db5ff1

File tree

5 files changed

+30
-11
lines changed

5 files changed

+30
-11
lines changed

.claude/memories/ci-e2e-testing.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,13 @@ yarn prettier:fix # Fix formatting for shell, markdown, and YAML file
323323

324324
**Purpose**: This package provides essential tooling for maintaining code quality in the CI infrastructure, ensuring consistent formatting and shell script best practices across the pipeline scripts.
325325

326+
#### Shell Script Conventions
327+
328+
**Shell scripts in `.ibm/` folder:**
329+
- **Never use** `set pipefail` or `set -o pipefail`
330+
- Only `.ibm/pipelines/openshift-ci-tests.sh` defines global `set` options; other scripts inherit them
331+
- Functions may temporarily disable/re-enable error handling with `set +e` / `set -e` pattern
332+
326333
#### Job Handlers
327334
The main script handles different job types:
328335
- `handle_aks_helm`: AKS Helm deployment

.cursor/rules/ci-e2e-testing.mdc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,13 @@ yarn prettier:fix # Fix formatting for shell, markdown, and YAML file
329329

330330
**Purpose**: This package provides essential tooling for maintaining code quality in the CI infrastructure, ensuring consistent formatting and shell script best practices across the pipeline scripts.
331331

332+
#### Shell Script Conventions
333+
334+
**Shell scripts in `.ibm/` folder:**
335+
- **Never use** `set pipefail` or `set -o pipefail`
336+
- Only `.ibm/pipelines/openshift-ci-tests.sh` defines global `set` options; other scripts inherit them
337+
- Functions may temporarily disable/re-enable error handling with `set +e` / `set -e` pattern
338+
332339
#### Job Handlers
333340
The main script handles different job types:
334341
- `handle_aks_helm`: AKS Helm deployment

.ibm/pipelines/openshift-ci-tests.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ save_container_platform "${CONTAINER_PLATFORM}" "${CONTAINER_PLATFORM_VERSION}"
3535
# Define a cleanup function to be executed upon script exit.
3636
# shellcheck source=.ibm/pipelines/cleanup.sh
3737
source "${DIR}/cleanup.sh"
38-
trap cleanup EXIT INT ERR
38+
trap cleanup EXIT
3939

4040
log::debug "Sourcing utils.sh"
4141
# shellcheck source=.ibm/pipelines/utils.sh

.ibm/pipelines/utils.sh

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@ retrieve_pod_logs() {
99
local pod_name=$1
1010
local container=$2
1111
local namespace=$3
12+
local log_timeout=${4:-30} # Default timeout: 30 seconds
1213
log::debug "Retrieving logs for container: $container"
13-
# Save logs for the current and previous container
14-
kubectl logs $pod_name -c $container -n $namespace > "pod_logs/${pod_name}_${container}.log" || { log::warn "logs for container $container not found"; }
15-
kubectl logs $pod_name -c $container -n $namespace --previous > "pod_logs/${pod_name}_${container}-previous.log" 2> /dev/null || {
16-
log::debug "Previous logs for container $container not found"
14+
# Save logs for the current and previous container with timeout to prevent hanging
15+
timeout "${log_timeout}" kubectl logs "$pod_name" -c "$container" -n "$namespace" > "pod_logs/${pod_name}_${container}.log" 2> /dev/null || { log::warn "logs for container $container not found or timed out"; }
16+
timeout "${log_timeout}" kubectl logs "$pod_name" -c "$container" -n "$namespace" --previous > "pod_logs/${pod_name}_${container}-previous.log" 2> /dev/null || {
17+
log::debug "Previous logs for container $container not found or timed out"
1718
rm -f "pod_logs/${pod_name}_${container}-previous.log"
1819
}
1920
}
@@ -459,8 +460,6 @@ delete_namespace() {
459460
}
460461

461462
configure_external_postgres_db() {
462-
set -euo pipefail # Enable strict error handling
463-
464463
local project=$1
465464
local max_attempts=60 # 5 minutes total (60 attempts × 5 seconds)
466465
local wait_interval=5
@@ -702,7 +701,6 @@ run_tests() {
702701
e2e_tests_dir=$(pwd)
703702

704703
yarn install --immutable > /tmp/yarn.install.log.txt 2>&1
705-
706704
INSTALL_STATUS=$?
707705
if [ $INSTALL_STATUS -ne 0 ]; then
708706
echo "=== YARN INSTALL FAILED ==="
@@ -726,7 +724,7 @@ run_tests() {
726724

727725
local RESULT=${PIPESTATUS[0]}
728726

729-
pkill Xvfb
727+
pkill Xvfb || true
730728

731729
# Use namespace for artifact directory to keep artifacts organized by deployment
732730
mkdir -p "${ARTIFACT_DIR}/${namespace}/test-results"
@@ -1167,14 +1165,14 @@ check_and_test() {
11671165
oc get pods -n "${namespace}"
11681166
run_tests "${release_name}" "${namespace}" "${playwright_project}" "${url}"
11691167
else
1170-
echo "Backstage is not running. Exiting..."
1168+
echo "Backstage is not running. Marking deployment as failed and continuing..."
11711169
CURRENT_DEPLOYMENT=$((CURRENT_DEPLOYMENT + 1))
11721170
save_status_deployment_namespace $CURRENT_DEPLOYMENT "$namespace"
11731171
save_status_failed_to_deploy $CURRENT_DEPLOYMENT true
11741172
save_status_test_failed $CURRENT_DEPLOYMENT true
11751173
save_overall_result 1
11761174
fi
1177-
save_all_pod_logs $namespace
1175+
save_all_pod_logs "$namespace"
11781176
}
11791177

11801178
check_upgrade_and_test() {

.rulesync/rules/ci-e2e-testing.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,13 @@ yarn prettier:fix # Fix formatting for shell, markdown, and YAML file
340340

341341
**Purpose**: This package provides essential tooling for maintaining code quality in the CI infrastructure, ensuring consistent formatting and shell script best practices across the pipeline scripts.
342342

343+
#### Shell Script Conventions
344+
345+
**Shell scripts in `.ibm/` folder:**
346+
- **Never use** `set pipefail` or `set -o pipefail`
347+
- Only `.ibm/pipelines/openshift-ci-tests.sh` defines global `set` options; other scripts inherit them
348+
- Functions may temporarily disable/re-enable error handling with `set +e` / `set -e` pattern
349+
343350
#### Job Handlers
344351
The main script handles different job types:
345352
- `handle_aks_helm`: AKS Helm deployment

0 commit comments

Comments
 (0)