Skip to content

Commit c26bd68

Browse files
committed
Resolved comment
1 parent b6e2293 commit c26bd68

File tree

4 files changed

+79
-18
lines changed

4 files changed

+79
-18
lines changed

perf-benchmarking-for-releases/README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ Before running the benchmarks, ensure you have:
4949
The main script to run the benchmarks is `run-benchmarks.sh`.
5050
It should be executed from the `perf-benchmarking-for-releases` directory.
5151

52+
**Note:** This framework currently only supports benchmarking against regional GCS buckets with a flat object namespace (i.e., non-hierarchical).
53+
5254
### Syntax
5355

5456
```bash
@@ -62,13 +64,13 @@ bash run-benchmarks.sh <GCSFUSE_VERSION> <PROJECT_ID> <REGION> <MACHINE_TYPE> <I
6264
- `<REGION>`: The GCP region where the VM and GCS buckets will be created (e.g., `us-south1`).
6365
- `<MACHINE_TYPE>`: The GCE machine type for the benchmark VM (e.g., `n2-standard-96`). This script supports attaching 16 local NVMe SSDs (375GB each) for LSSD-supported machine types.
6466
- **Note:** If your machine type supports LSSD but is not included in the `LSSD_SUPPORTED_MACHINES` array within `run-benchmarks.sh` script, you may need to manually add it to ensure LSSDs are attached.
65-
- `<IMAGE_FAMILY>`: The image family for the VM (e.g., `ubuntu-2204-lts`).
67+
- `<IMAGE_FAMILY>`: The image family for the VM (e.g., `ubuntu-2504-amd64`).
6668
- `<IMAGE_PROJECT>`: The image project for the VM (e.g., `ubuntu-os-cloud`).
6769

6870
### Example:
6971

7072
```bash
71-
bash run-benchmarks.sh v2.12.0 gcs-fuse-test us-south1 n2-standard-96 ubuntu-2204-lts ubuntu-os-cloud
73+
bash run-benchmarks.sh master gcs-fuse-test us-south1 n2-standard-96 ubuntu-2504-amd64 ubuntu-os-cloud
7274
```
7375

7476
---

perf-benchmarking-for-releases/run-benchmarks.sh

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ if [ "$#" -ne 6 ]; then
2222
echo "This script should be run from the 'perf-benchmarking-for-releases' directory."
2323
echo ""
2424
echo "Example:"
25-
echo " bash run-benchmarks.sh v2.12.0 gcs-fuse-test us-south1 n2-standard-96 ubuntu-2204-lts ubuntu-os-cloud"
25+
echo " bash run-benchmarks.sh master gcs-fuse-test us-south1 n2-standard-96 ubuntu-2504-amd64 ubuntu-os-cloud"
2626
exit 1
2727
fi
2828

@@ -50,6 +50,7 @@ UNIQUE_ID="${TIMESTAMP}-${RAND_SUFFIX}"
5050
VM_NAME="gcsfuse-perf-benchmark-${UNIQUE_ID}"
5151
GCS_BUCKET_WITH_FIO_TEST_DATA="gcsfuse-release-benchmark-data-${UNIQUE_ID}"
5252
RESULTS_BUCKET_NAME="gcsfuse-release-benchmarks-results"
53+
BQ_TABLE="gcs-fuse-test-ml.gke_test_tool_outputs.fio_outputs"
5354
RESULT_PATH="gs://${RESULTS_BUCKET_NAME}/${GCSFUSE_VERSION}-${UNIQUE_ID}"
5455

5556

@@ -108,8 +109,8 @@ echo "Creating GCS test data bucket: gs://${GCS_BUCKET_WITH_FIO_TEST_DATA} in re
108109
gcloud storage buckets create "gs://${GCS_BUCKET_WITH_FIO_TEST_DATA}" --project="${PROJECT_ID}" --location="${REGION}"
109110

110111
# Upload FIO job files to the results bucket for the VM to download
111-
echo "Uploading all .fio job files from local 'fio-job-files/' directory to ${RESULT_PATH}/fio_job_files/..."
112-
gcloud storage cp fio_job_files/*.fio "${RESULT_PATH}/fio-job-files/"
112+
echo "Uploading all .fio job files from local 'fio-job-files/' directory to ${RESULT_PATH}/fio-job-files/..."
113+
gcloud storage cp fio-job-files/*.fio "${RESULT_PATH}/fio-job-files/"
113114
echo "FIO job files uploaded."
114115

115116
# Get the project number
@@ -119,11 +120,14 @@ PROJECT_NUMBER=$(gcloud projects describe "$PROJECT_ID" --format="value(projectN
119120
STS_ACCOUNT="project-${PROJECT_NUMBER}@storage-transfer-service.iam.gserviceaccount.com"
120121

121122
# Grant the service account 'roles/storage.admin' permissions on the newly created bucket
123+
# This allows the service account to manage the bucket and perform transfers
122124
gcloud storage buckets add-iam-policy-binding "gs://${GCS_BUCKET_WITH_FIO_TEST_DATA}" \
123125
--member="serviceAccount:${STS_ACCOUNT}" \
124126
--role="roles/storage.admin"
125127

126-
# Use storage transfer job to copy test data from a fixed GCS bucket.
128+
# Since file generation with fio is painfully slow, we will use storage transfer
129+
# job to transfer test data from a fixed GCS bucket to the newly created bucket.
130+
# Note : We need to copy only read data.
127131
echo "Creating storage transfer job to copy read data to gs://${GCS_BUCKET_WITH_FIO_TEST_DATA}..."
128132
gcloud transfer jobs create \
129133
gs://gcsfuse-release-benchmark-fio-data \
@@ -158,16 +162,18 @@ SUCCESS_FILE_PATH="${RESULT_PATH}/success.txt"
158162
LOG_FILE_PATH="${RESULT_PATH}/benchmark_run.log"
159163
SLEEP_TIME=300 # 5 minutes
160164
sleep "$SLEEP_TIME"
165+
#max 18 retries amounting to ~1hr30mins time
161166
MAX_RETRIES=18
162167

163168
for ((i=1; i<=MAX_RETRIES; i++)); do
164169
if gcloud storage objects describe "${SUCCESS_FILE_PATH}" &> /dev/null; then
165170
echo "Benchmarks completed. success.txt found."
166-
echo "Results are available in BigQuery: gcs-fuse-test-ml.gke_test_tool_outputs.fio_outputs"
171+
echo "Results are available in BigQuery: ${BQ_TABLE}"
167172
echo "Benchmark log file: $LOG_FILE_PATH"
168173
exit 0
169174
fi
170175

176+
# Check for early failure indicators
171177
if gcloud storage objects describe "${RESULT_PATH}/details.txt" &> /dev/null || \
172178
gcloud storage objects describe "$LOG_FILE_PATH" &> /dev/null; then
173179
echo "Benchmark log or details.txt found, but success.txt is missing. Possible error in benchmark execution."
@@ -179,7 +185,9 @@ for ((i=1; i<=MAX_RETRIES; i++)); do
179185
sleep "$SLEEP_TIME"
180186
done
181187

182-
188+
# Failure case: success.txt was not found after retries
183189
echo "Timed out waiting for success.txt after $((MAX_RETRIES * SLEEP_TIME / 60)) minutes. Perhaps there is some error."
184190
echo "Benchmark log file (for troubleshooting): $LOG_FILE_PATH"
185191
exit 1
192+
193+
# The trap command will handle the cleanup on script exit.

perf-benchmarking-for-releases/starter-script.sh

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,10 @@ else
3333
exit 1
3434
fi
3535

36+
# Install common dependencies before adding starterscriptuser
3637
if [[ "$OS_FAMILY" == "debian_ubuntu" ]]; then
3738
sudo apt-get update
38-
sudo apt-get install -y wget git fio libaio-dev gcc make mdadm build-essential python3-setuptools python3-crcmod python3-pip fuse jq bc procps gawk
39+
sudo apt-get install -y wget git fio libaio-dev gcc make mdadm build-essential python3-setuptools python3-crcmod python3-pip python3-venv fuse jq bc procps gawk
3940
elif [[ "$OS_FAMILY" == "rhel_centos" ]]; then
4041
sudo yum makecache
4142
sudo yum -y install git fio fuse libaio libaio-devel gcc make mdadm redhat-rpm-config python3-devel python3-setuptools python3-pip jq bc procps-ng wget gawk
@@ -60,6 +61,8 @@ sudo -u starterscriptuser OS_FAMILY="$OS_FAMILY" bash <<'EOF'
6061
set -x
6162
set -e
6263
64+
UPLOAD_FAILED=false
65+
6366
# Function to monitor GCSFuse CPU and memory usage
6467
monitor_gcsfuse_usage() {
6568
local log_file="$1"
@@ -122,6 +125,7 @@ monitor_gcsfuse_usage() {
122125
}
123126
124127
BENCHMARK_LOG_FILE="/tmp/benchmark_run.log"
128+
# Redirect stdout and stderr to BENCHMARK_LOG_FILE and also to original stdout/stderr
125129
exec > >(tee -a "$BENCHMARK_LOG_FILE") 2>&1
126130
127131
cleanup() {
@@ -144,6 +148,7 @@ cd ~/
144148
echo "Current directory: $(pwd)"
145149
echo "User: $(whoami)"
146150
151+
# Fetch metadata parameters
147152
GCSFUSE_VERSION=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/attributes/GCSFUSE_VERSION" -H "Metadata-Flavor: Google")
148153
GCS_BUCKET_WITH_FIO_TEST_DATA=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/attributes/GCS_BUCKET_WITH_FIO_TEST_DATA" -H "Metadata-Flavor: Google")
149154
RESULT_PATH=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/attributes/RESULT_PATH" -H "Metadata-Flavor: Google")
@@ -153,6 +158,7 @@ VM_NAME=$(hostname)
153158
UNIQUE_ID=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/attributes/UNIQUE_ID" -H "Metadata-Flavor: Google")
154159
GCSFUSE_MOUNT_OPTIONS_STR="implicit-dirs"
155160
161+
# Determine system architecture
156162
ARCHITECTURE=""
157163
if [[ "$OS_FAMILY" == "debian_ubuntu" ]]; then
158164
ARCHITECTURE=$(dpkg --print-architecture)
@@ -167,9 +173,13 @@ elif [[ "$OS_FAMILY" == "rhel_centos" ]]; then
167173
exit 1
168174
fi
169175
fi
176+
177+
# Install Go
170178
wget -nv --tries=3 --waitretry=5 -O go_tar.tar.gz "https://go.dev/dl/go1.24.0.linux-${ARCHITECTURE}.tar.gz"
171179
sudo tar -C /usr/local -xzf go_tar.tar.gz
172180
export PATH=$PATH:/usr/local/go/bin
181+
182+
# Clone and build gcsfuse
173183
git clone https://github.com/GoogleCloudPlatform/gcsfuse.git
174184
cd gcsfuse
175185
git checkout "$GCSFUSE_VERSION"
@@ -182,15 +192,19 @@ GCSFUSE_BIN="$CURR_DIR/gcsfuse/gcsfuse"
182192
MNT="$CURR_DIR/$MOUNT_POINT"
183193
SSD_MOUNT_DIR="/mnt/disks/local_ssd"
184194
FIO_JOB_DIR="/tmp/fio_jobs"
195+
196+
# Download all FIO job spec files
185197
mkdir -p "$FIO_JOB_DIR"
186-
gcloud storage cp "${RESULT_PATH}/fio_job_files/*.fio" "$FIO_JOB_DIR/"
198+
gcloud storage cp "${RESULT_PATH}/fio-job-files/*.fio" "$FIO_JOB_DIR/"
187199
200+
# Capture versions
188201
{
189202
echo "GCSFuse version: $GCSFUSE_VERSION"
190203
echo "Go version : $(go version)"
191204
echo "FIO version : $(fio --version)"
192205
} >> details.txt
193206
207+
# Create LSSD if enabled
194208
if [[ "$LSSD_ENABLED" == "true" ]]; then
195209
LSSD_DEVICES=()
196210
for i in {0..15}; do
@@ -213,7 +227,12 @@ if [[ "$LSSD_ENABLED" == "true" ]]; then
213227
fi
214228
215229
git clone --single-branch --branch fio-to-bigquery https://github.com/GoogleCloudPlatform/gcsfuse-tools.git
216-
python3 -m pip install --user -r gcsfuse-tools/perf_benchmarking_for_releases/requirements.txt
230+
cd gcsfuse-tools
231+
232+
python3 -m venv py_venv
233+
source py_venv/bin/activate
234+
python3 -m pip install -r perf-benchmarking-for-releases/requirements.txt
235+
217236
IFS=',' read -r -a GCSFUSE_FLAGS_ARRAY <<< "$GCSFUSE_MOUNT_OPTIONS_STR"
218237
GCSFUSE_FLAGS=()
219238
for flag in "${GCSFUSE_FLAGS_ARRAY[@]}"; do
@@ -255,6 +274,7 @@ for master_fio_file in "$FIO_JOB_DIR"/*.fio; do
255274
[[ "$LSSD_ENABLED" == "true" ]] && reformat_and_remount_lssd
256275
sudo sh -c "echo 3 > /proc/sys/vm/drop_caches"
257276
277+
# Mount GCS bucket using gcsfuse
258278
mkdir -p "$MNT"
259279
"$GCSFUSE_BIN" "${GCSFUSE_FLAGS[@]}" "$GCS_BUCKET_WITH_FIO_TEST_DATA" "$MNT"
260280
@@ -281,26 +301,40 @@ for master_fio_file in "$FIO_JOB_DIR"/*.fio; do
281301
read -r LOWEST_CPU HIGHEST_CPU <<< $(gawk 'BEGIN{min="inf";max="-inf"} {if($2<min)min=$2; if($2>max)max=$2} END{if(min=="inf")print "0.0 0.0"; else print min, max}' "$monitor_log")
282302
read -r LOWEST_MEM HIGHEST_MEM <<< $(gawk 'BEGIN{min="inf";max="-inf"} {if($3<min)min=$3; if($3>max)max=$3} END{if(min=="inf")print "0 0"; else print min, max}' "$monitor_log")
283303
284-
python3 gcsfuse-tools/perf_benchmarking_for_releases/upload_fio_output_to_bigquery.py \
304+
if python3 perf-benchmarking-for-releases/upload_fio_output_to_bigquery.py \
285305
--result-file "$RESULT_FILE" \
286306
--fio-job-file "$single_fio_file" \
287307
--master-fio-file "$master_fio_file" \
288308
--lowest-cpu "$LOWEST_CPU" \
289309
--highest-cpu "$HIGHEST_CPU" \
290310
--lowest-mem "$LOWEST_MEM" \
291311
--highest-mem "$HIGHEST_MEM" \
292-
--gcsfuse-mount-options "$GCSFUSE_MOUNT_OPTIONS_STR"
293-
294-
rm -f "$RESULT_FILE" "$monitor_log"
312+
--gcsfuse-mount-options "$GCSFUSE_MOUNT_OPTIONS_STR"; then
313+
echo "Successfully uploaded results to BigQuery for job: $job_file_basename"
314+
rm -f "$RESULT_FILE" "$monitor_log"
315+
else
316+
echo "Warning: Failed to upload results to BigQuery for job: $job_file_basename. Uploading monitor log to GCS for debugging."
317+
gcloud storage cp "$monitor_log" "${RESULTS_SUBDIR_PATH}/" || echo "Warning: Failed to upload monitor log for ${job_file_basename}"
318+
UPLOAD_FAILED=true
319+
fi
295320
done
296321
297322
rm -rf "$SPLIT_DIR"
298323
done
299324
300-
touch success.txt
301-
gcloud storage cp success.txt "$RESULT_PATH"
302-
rm success.txt
325+
cd ..
326+
327+
if [[ "$UPLOAD_FAILED" == "false" ]]; then
328+
# All tests ran successfully; create a success.txt file in GCS
329+
touch success.txt
330+
gcloud storage cp success.txt "$RESULT_PATH"
331+
rm success.txt
332+
else
333+
echo "One or more BigQuery uploads failed. Not creating success.txt to indicate benchmark failure."
334+
fi
303335
304336
EOF
305337

306338
echo "Starter script finished execution on VM."
339+
340+
# The trap command will handle the cleanup on script exit.

perf-benchmarking-for-releases/upload_fio_output_to_bigquery.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,22 @@
1+
#!/bin/bash
2+
# Copyright 2025 Google LLC
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
116
import json
217
import datetime
318
import argparse
19+
import sys
420
from google.cloud import bigquery
521
import requests
622

@@ -214,5 +230,6 @@ def fetch_metadata(attribute):
214230
errors = client.insert_rows_json(full_table_id, [row_to_insert])
215231
if errors:
216232
print("Errors inserting rows:", errors)
233+
sys.exit(1)
217234
else:
218235
print(f"Inserted 1 row for job '{jobname}' into {full_table_id}")

0 commit comments

Comments
 (0)