Skip to content

Commit 570e476

Browse files
committed
Add machine type and gcsfuse version in bigquery schema
1 parent 3114999 commit 570e476

File tree

3 files changed

+46
-13
lines changed

3 files changed

+46
-13
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
google-cloud-bigquery
2+
requests

perf-benchmarking-for-releases/run-benchmarks.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ gcloud compute instances create "${VM_NAME}" \
166166
--network-interface=network-tier=PREMIUM,nic-type=GVNIC \
167167
--scopes=https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/devstorage.read_write \
168168
--network-performance-configs=total-egress-bandwidth-tier=TIER_1 \
169-
--metadata GCSFUSE_VERSION="${GCSFUSE_VERSION}",GCS_BUCKET_WITH_FIO_TEST_DATA="${GCS_BUCKET_WITH_FIO_TEST_DATA}",RESULTS_BUCKET_NAME="${RESULTS_BUCKET_NAME}",LSSD_ENABLED="${LSSD_ENABLED}" \
169+
--metadata GCSFUSE_VERSION="${GCSFUSE_VERSION}",GCS_BUCKET_WITH_FIO_TEST_DATA="${GCS_BUCKET_WITH_FIO_TEST_DATA}",RESULTS_BUCKET_NAME="${RESULTS_BUCKET_NAME}",LSSD_ENABLED="${LSSD_ENABLED}",MACHINE_TYPE="${MACHINE_TYPE}" \
170170
--metadata-from-file=startup-script=starter-script.sh \
171171
${VM_LOCAL_SSD_ARGS}
172172
echo "VM created. Benchmarks will run on the VM."

perf-benchmarking-for-releases/upload-fio-output-to-bigquery.py

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,22 @@
1212

1313
args = parser.parse_args()
1414

15+
import requests
16+
17+
def fetch_metadata(attribute):
18+
url = f"http://metadata.google.internal/computeMetadata/v1/instance/attributes/{attribute}"
19+
headers = {"Metadata-Flavor": "Google"}
20+
try:
21+
response = requests.get(url, headers=headers, timeout=5)
22+
response.raise_for_status()
23+
return response.text
24+
except Exception as e:
25+
print(f"Failed to fetch metadata attribute '{attribute}': {e}")
26+
return "unknown"
27+
28+
machine_type = fetch_metadata("MACHINE_TYPE")
29+
gcsfuse_version = fetch_metadata("GCSFUSE_VERSION")
30+
1531
# Load the results file
1632
with open(args.result_file) as f:
1733
try:
@@ -36,14 +52,16 @@
3652
# Create table if it doesn't exist
3753
schema = [
3854
bigquery.SchemaField("job_name", "STRING"),
55+
bigquery.SchemaField("gcsfuse_version", "STRING"),
56+
bigquery.SchemaField("machine_type", "STRING"),
3957
bigquery.SchemaField("start_time", "TIMESTAMP"),
4058
bigquery.SchemaField("file_size", "STRING"),
4159
bigquery.SchemaField("block_size", "STRING"),
4260
bigquery.SchemaField("nrfiles", "INTEGER"),
4361
bigquery.SchemaField("read_bandwidth_MiBps", "FLOAT"),
4462
bigquery.SchemaField("write_bandwidth_MiBps", "FLOAT"),
4563
bigquery.SchemaField("IOPS", "FLOAT"),
46-
bigquery.SchemaField("duration_seconds", "FLOAT"),
64+
bigquery.SchemaField("avg_latency_ms", "FLOAT"),
4765
]
4866

4967
try:
@@ -59,31 +77,45 @@
5977
rows = []
6078
for job in data.get("jobs", []):
6179
jobname = job.get("jobname")
62-
# Correctly access job options using .get() for nested keys
6380
job_options = job.get("job options", {})
6481

65-
# Use get with a default value for each option and handle string conversion
66-
file_size = job_options.get("filesize", data.get("global options",{}).get("filesize", "unknown"))
67-
block_size = job_options.get("bs", data.get("global options",{}).get("bs", "unknown"))
68-
69-
# Convert nrfiles to int, handle missing values and potential string values
70-
nrfiles_str = job_options.get("nrfiles", data.get("global options",{}).get("nrfiles"))
82+
file_size = job_options.get("filesize", data.get("global options", {}).get("filesize", "unknown"))
83+
block_size = job_options.get("bs", data.get("global options", {}).get("bs", "unknown"))
84+
85+
nrfiles_str = job_options.get("nrfiles", data.get("global options", {}).get("nrfiles"))
7186
nrfiles = int(nrfiles_str) if nrfiles_str and isinstance(nrfiles_str, str) and nrfiles_str.isdigit() else 0
7287

73-
read_bw = job.get("read", {}).get("bw_bytes", 0) / (1024 * 1024)
74-
write_bw = job.get("write", {}).get("bw_bytes", 0) / (1024 * 1024)
75-
iops = job.get("read", {}).get("iops", 0.0) + job.get("write", {}).get("iops", 0.0)
88+
read = job.get("read", {})
89+
write = job.get("write", {})
90+
91+
read_bw = read.get("bw_bytes", 0) / (1024 * 1024)
92+
write_bw = write.get("bw_bytes", 0) / (1024 * 1024)
93+
iops = read.get("iops", 0.0) + write.get("iops", 0.0)
94+
95+
read_lat_ns = read.get("lat_ns", {}).get("mean")
96+
write_lat_ns = write.get("lat_ns", {}).get("mean")
97+
98+
if read_lat_ns is not None and write_lat_ns is not None:
99+
avg_latency_ms = ((read_lat_ns + write_lat_ns) / 2) / 1_000_000
100+
elif read_lat_ns is not None:
101+
avg_latency_ms = read_lat_ns / 1_000_000
102+
elif write_lat_ns is not None:
103+
avg_latency_ms = write_lat_ns / 1_000_000
104+
else:
105+
avg_latency_ms = 0.0
76106

77107
rows.append({
78108
"job_name": jobname,
109+
"gcsfuse_version": gcsfuse_version,
110+
"machine_type": machine_type,
79111
"start_time": start_time,
80112
"file_size": file_size,
81113
"block_size": block_size,
82114
"nrfiles": nrfiles,
83115
"read_bandwidth_MiBps": read_bw,
84116
"write_bandwidth_MiBps": write_bw,
85117
"IOPS": iops,
86-
"duration_seconds": job.get("job_runtime", 0) / 1000,
118+
"avg_latency_ms": avg_latency_ms,
87119
})
88120

89121
# Insert rows

0 commit comments

Comments
 (0)