Skip to content

Commit 4f930e1

Browse files
authored
benchmark_regression v1 (#7573)
# Overview - add device_Info, labels in benchmark regression report schema - auto detect deviceInfos, add labels in case customize search - switch the benchmark table from fortesting to benchmark db - add unit test for lambda # Migrate to benchmark db table once this is deployed, we will route UI traffic to benchmark table, we can either backfilling the data from fortesting, or can start over again since it's regression report data, nothing memoriable
1 parent 4e2a4f8 commit 4f930e1

File tree

8 files changed

+736
-26
lines changed

8 files changed

+736
-26
lines changed

.github/workflows/tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,5 +47,5 @@ jobs:
4747
pip install -r aws/lambda/tests/test_requirements.txt
4848
echo ::endgroup::
4949
50-
# Test aws lambda
51-
pytest -v aws/lambda/tests
50+
# Test aws lambda, add relative path to PYTHONPATH for importing
51+
PYTHONPATH=aws/lambda:aws/lambda/benchmark_regression_summary_report pytest -v aws/lambda/tests

aws/lambda/benchmark_regression_summary_report/common/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@
164164
"latency": RegressionPolicy(
165165
name="latency",
166166
condition="less_equal",
167-
threshold=1.20,
167+
threshold=1.35,
168168
baseline_aggregation="median",
169169
),
170170
},

aws/lambda/benchmark_regression_summary_report/common/regression_utils.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ class BenchmarkRegressionReport(TypedDict):
7474
results: List[PerGroupResult]
7575
baseline_meta_data: TimeSeriesMetaInfo
7676
new_meta_data: TimeSeriesMetaInfo
77+
device_info: List[str]
7778

7879

7980
def get_regression_status(regression_summary: BenchmarkRegressionSummary) -> str:
@@ -103,6 +104,8 @@ def __init__(
103104
self.lastest_ts_info = self._get_meta_info(target_ts.time_series)
104105
self.target_ts = self._to_data_map(target_ts)
105106
self.baseline_ts = self._to_data_map(baseline_ts)
107+
# collect device info from target_ts
108+
self.device_info = self._to_device_info(target_ts)
106109

107110
def generate(self) -> BenchmarkRegressionReport:
108111
if not self.baseline_ts or not self.target_ts:
@@ -148,7 +151,6 @@ def detect_regressions_with_policies(
148151

149152
base_item = baseline_map.get(key)
150153
if not base_item:
151-
logger.warning("Skip. No baseline item found for %s", key)
152154
results.append(
153155
PerGroupResult(
154156
group_info=gi,
@@ -217,6 +219,7 @@ def detect_regressions_with_policies(
217219
results=results,
218220
baseline_meta_data=self.baseline_ts_info,
219221
new_meta_data=self.lastest_ts_info,
222+
device_info=self.device_info,
220223
)
221224

222225
def summarize_label_counts(
@@ -243,6 +246,22 @@ def _label_str(self, x) -> str:
243246
return (v if isinstance(v, str) else str(v)).lower()
244247
return str(x).lower()
245248

249+
def _to_device_info(self, data: "BenchmarkTimeSeriesApiData") -> List[str]:
250+
result = set()
251+
for ts_group in data.time_series:
252+
device = ts_group.group_info.get("device", "")
253+
arch = ts_group.group_info.get("arch", "")
254+
key = ""
255+
if device and arch:
256+
key = f"{device}_{arch}"
257+
elif device:
258+
key = device
259+
260+
if not key:
261+
continue
262+
result.add(key)
263+
return list(result)
264+
246265
def _to_data_map(
247266
self, data: "BenchmarkTimeSeriesApiData", field: str = "value"
248267
) -> Dict[tuple, BenchmarkRegressionPointGroup]:

aws/lambda/benchmark_regression_summary_report/common/report_manager.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,10 @@ def __init__(
5656
# extract latest meta data from report
5757
self.baseline = self.raw_report["baseline_meta_data"]
5858
self.target = self.raw_report["new_meta_data"]
59+
self.device_info = self.raw_report["device_info"]
5960
self.target_latest_commit = self.target["end"]["commit"]
6061
self.target_latest_ts_str = self.target["end"]["timestamp"]
6162
self.status = get_regression_status(self.raw_report["summary"])
62-
6363
self.report_data = self._to_report_data(
6464
config_id=config.id,
6565
regression_report=self.raw_report,
@@ -74,30 +74,34 @@ def run(
7474
main method used to insert the report to db and create github comment in targeted issue
7575
"""
7676
try:
77-
applied_insertion = self.insert_to_db(cc)
77+
applied_insertion = self.insert_to_db(cc, self.db_table_name)
7878
except Exception as e:
79-
logger.error(f"failed to insert report to db, error: {e}")
79+
logger.warning(f"failed to insert report to db, error: {str(e)}")
8080
raise
8181
if not applied_insertion:
82-
logger.info("[%s] skip notification, already exists in db", self.config_id)
82+
logger.info(
83+
"[%s] skip notification, already exists in db or this is dry-run",
84+
self.config_id,
85+
)
8386
return
8487
self.notify_github_comment(github_token)
88+
logger.info("[%s] Done. ReportManager run is completed.", self.config_id)
8589

86-
def notify_github_comment(self, github_token: str):
90+
def notify_github_comment(self, github_token: str) -> str:
8791
if self.status != "regression":
8892
logger.info(
8993
"[%s] no regression found, skip notification",
9094
self.config_id,
9195
)
92-
return
96+
return "skip_no_regression"
9397

9498
github_notification = self.config.policy.get_github_notification_config()
9599
if not github_notification:
96100
logger.info(
97101
"[%s] no github notification config found, skip notification",
98102
self.config_id,
99103
)
100-
return
104+
return "skip_no_notification_config"
101105
logger.info("[%s] prepareing gitub comment content", self.config_id)
102106
content = self._to_markdown()
103107
if self.is_dry_run:
@@ -109,10 +113,14 @@ def notify_github_comment(self, github_token: str):
109113
logger.info("[dry run] printing comment content")
110114
print(json.dumps(content, indent=2, default=str))
111115
logger.info("[dry run] Done! Finish printing comment content")
112-
return
113-
logger.info("[%s] create comment to github issue", self.config_id)
114-
github_notification.create_github_comment(content, github_token)
115-
logger.info("[%s] done. comment is sent to github", self.config_id)
116+
return "skip_dry_run"
117+
try:
118+
github_notification.create_github_comment(content, github_token)
119+
logger.info("[%s] done. comment is sent to github", self.config_id)
120+
return "success"
121+
except Exception as e:
122+
logger.warning(f"failed to insert report to db, error: {str(e)}")
123+
return "failure"
116124

117125
def _to_markdown(self) -> str:
118126
regression_items = [
@@ -134,8 +142,7 @@ def _to_markdown(self) -> str:
134142
)
135143

136144
def insert_to_db(
137-
self,
138-
cc: clickhouse_connect.driver.client.Client,
145+
self, cc: clickhouse_connect.driver.client.Client, table: str
139146
) -> bool:
140147
logger.info(
141148
"[%s]prepare data for db insertion report (%s)...", self.config_id, self.id
@@ -176,6 +183,7 @@ def insert_to_db(
176183
"total_count": regression_summary["total_count"],
177184
"repo": self.repo,
178185
"report_json": report_json,
186+
"device_info": self.device_info,
179187
}
180188

181189
if self.is_dry_run:
@@ -196,7 +204,7 @@ def insert_to_db(
196204
try:
197205
if self._row_exists(
198206
cc,
199-
self.db_table_name,
207+
table,
200208
params["report_id"],
201209
params["type"],
202210
params["repo"],
@@ -208,7 +216,7 @@ def insert_to_db(
208216
self.id,
209217
)
210218
return False
211-
self._db_insert(cc, self.db_table_name, params)
219+
self._db_insert(cc, table, params)
212220
logger.info(
213221
"[%s] Done. inserted benchmark regression report(%s)",
214222
self.config_id,
@@ -219,7 +227,7 @@ def insert_to_db(
219227
logger.exception(
220228
"[%s] failed to insert report to target table %s",
221229
self.config_id,
222-
self.db_table_name,
230+
table,
223231
)
224232
raise
225233

@@ -256,7 +264,8 @@ def _db_insert(
256264
suspected_regression_count,
257265
total_count,
258266
repo,
259-
report
267+
report,
268+
device_info
260269
)
261270
VALUES
262271
(
@@ -271,9 +280,12 @@ def _db_insert(
271280
%(suspected_regression_count)s,
272281
%(total_count)s,
273282
%(repo)s,
274-
%(report_json)s
283+
%(report_json)s,
284+
%(device_info)s
275285
)
276286
"""
287+
# debugging only - uncomment to see the sql
288+
# logger.info("[%s]inserting report to db, sql: %s", self.config_id, sql)
277289
cc.command(sql, parameters=params)
278290

279291
def _row_exists(

aws/lambda/benchmark_regression_summary_report/lambda_function.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919

2020
# TODO(elainewy): change this to benchmark.benchmark_regression_report once the table is created
21-
BENCHMARK_REGRESSION_REPORT_TABLE = "fortesting.benchmark_regression_report"
21+
BENCHMARK_REGRESSION_REPORT_TABLE = "benchmark.benchmark_regression_report"
2222
BENCHMARK_REGRESSION_TRACKING_CONFIG_IDS = ["compiler_regression"]
2323

2424
logging.basicConfig(

0 commit comments

Comments
 (0)