Skip to content

Commit

Permalink
Fixed comparison ERROR on coordinator, and extended hash,bitmap, and …
Browse files Browse the repository at this point in the history
…list benchmarks (#270)

* Enabled running forks source built benchmarks

* Fixed server_name Null check

* Enabled passing baseline/comparison hash and github_repo to ensure proper data filtering on compare. Removed refs/heads/ usage from builder

* skipping cli builder test on ci

* Added --baseline-target-branch and --comparison-target-branch to the compare tool

* Added GEOPOS and GEOSEARCH WITHCOORD new benchmarks

* Included the connection setup benchmark using HELLO

* Bumping version from 0.1.218 to 0.1.219

* Added APPEND/INCRBY/INCRBYFLOAT/SETRANGE benchmarks

* Included APPEND,INCRBY,INCRBYFLOAT,SETRANGE pipeline 10 benchmarks

* Added SETEX benchmark

* Added ZUNION, ZUNIONSTORE, and extra ZADD benchmarks. Included SADD benchmarks with intset underlying encoding

* Included extra pipeline 10 and lrange with longs benchmarks

* Included HGETALL 50 fields use-case

* Added HGETALL 50 fields use-case. 10Bytes and 100Bytes

* Fixed comparison on CI

* Added BITCOUNT benchmarks
  • Loading branch information
fcostaoliveira authored Sep 7, 2024
1 parent e31755f commit d70c269
Show file tree
Hide file tree
Showing 15 changed files with 401 additions and 86 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "redis-benchmarks-specification"
version = "0.1.226"
version = "0.1.234"
description = "The Redis benchmarks specification describes the cross-language/tools requirements and expectations to foster performance and observability standards around redis related technologies. Members from both industry and academia, including organizations and individuals are encouraged to contribute."
authors = ["filipecosta90 <[email protected]>","Redis Performance Group <[email protected]>"]
readme = "Readme.md"
Expand Down
21 changes: 11 additions & 10 deletions redis_benchmarks_specification/__builder__/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,16 +370,17 @@ def builder_process_stream(
deps_list.append("fpconv")
redis_temporary_dir = temporary_dir + "/" + redis_dir + "/"
logging.info("Using redis temporary dir {}".format(redis_temporary_dir))
build_command = "bash -c 'make Makefile.dep && cd ./deps && CXX={} CC={} make {} {} -j && cd .. && CXX={} CC={} make {} {} -j'".format(
cpp_compiler,
compiler,
" ".join(deps_list),
build_vars_str,
cpp_compiler,
compiler,
"redis-server",
build_vars_str,
)
# build_command = "bash -c 'make Makefile.dep && cd ./deps && CXX={} CC={} make {} {} -j && cd .. && CXX={} CC={} make {} {} -j'".format(
# cpp_compiler,
# compiler,
# " ".join(deps_list),
# build_vars_str,
# cpp_compiler,
# compiler,
# "redis-server",
# build_vars_str,
# )
build_command = "sh -c 'make -j'"
if b"build_command" in testDetails:
build_command = testDetails[b"build_command"].decode()
server_name = "redis"
Expand Down
87 changes: 55 additions & 32 deletions redis_benchmarks_specification/__compare__/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def compare_command_logic(args, project_name, project_version):
(
detected_regressions,
table_output,
total_improvements,
improvements_list,
regressions_list,
total_stable,
total_unstable,
Expand Down Expand Up @@ -332,6 +332,7 @@ def compare_command_logic(args, project_name, project_version):
args.improvement_str,
)
total_regressions = len(regressions_list)
total_improvements = len(improvements_list)
prepare_regression_comment(
auto_approve,
baseline_branch,
Expand Down Expand Up @@ -359,6 +360,7 @@ def compare_command_logic(args, project_name, project_version):
verbose,
args.regressions_percent_lower_limit,
regressions_list,
improvements_list,
)
return (
detected_regressions,
Expand Down Expand Up @@ -398,6 +400,7 @@ def prepare_regression_comment(
verbose,
regressions_percent_lower_limit,
regressions_list=[],
improvements_list=[],
):
if total_comparison_points > 0:
comment_body = "### Automated performance analysis summary\n\n"
Expand All @@ -424,18 +427,27 @@ def prepare_regression_comment(
comparison_summary += "- Detected a total of {} improvements above the improvement water line.\n".format(
total_improvements
)
if len(improvements_list) > 0:
regression_values = [l[1] for l in improvements_list]
regression_df = pd.DataFrame(regression_values)
median_regression = round(float(regression_df.median().iloc[0]), 1)
max_regression = round(float(regression_df.max().iloc[0]), 1)
min_regression = round(float(regression_df.min().iloc[0]), 1)

comparison_summary += f" - Median/Common-Case improvement was {median_regression}% and ranged from [{min_regression}%,{max_regression}%].\n"

if total_regressions > 0:
comparison_summary += "- Detected a total of {} regressions bellow the regression water line {}.\n".format(
total_regressions, regressions_percent_lower_limit
)
if len(regressions_list) > 0:
regression_values = [l[1] for l in regressions_list]
regression_df = pd.DataFrame(regression_values)
median_regression = round(float(regression_df.median().iloc[0]), 2)
max_regression = round(float(regression_df.max().iloc[0]), 2)
min_regression = round(float(regression_df.min().iloc[0]), 2)
median_regression = round(float(regression_df.median().iloc[0]), 1)
max_regression = round(float(regression_df.max().iloc[0]), 1)
min_regression = round(float(regression_df.min().iloc[0]), 1)

comparison_summary += f" - Median/Common-Case regression was {median_regression}%% and ranged from [{min_regression},{max_regression}] %%.\n"
comparison_summary += f" - Median/Common-Case regression was {median_regression}% and ranged from [{min_regression}%,{max_regression}%].\n"

comment_body += comparison_summary
comment_body += "\n"
Expand Down Expand Up @@ -542,7 +554,7 @@ def compute_regression_table(
tf_triggering_env,
metric_name,
comparison_branch,
baseline_branch="master",
baseline_branch="unstable",
baseline_tag=None,
comparison_tag=None,
baseline_deployment_name="oss-standalone",
Expand Down Expand Up @@ -704,8 +716,8 @@ def compute_regression_table(
table_name="",
headers=[
"Test Case",
"Baseline {} (median obs. +- std.dev)".format(baseline_str),
"Comparison {} (median obs. +- std.dev)".format(comparison_str),
f"Baseline {baseline_github_org}/{baseline_github_repo} {baseline_str} (median obs. +- std.dev)",
f"Comparison {comparison_github_org}/{comparison_github_repo} {comparison_str} (median obs. +- std.dev)",
"% change ({})".format(metric_mode),
"Note",
],
Expand All @@ -727,8 +739,8 @@ def compute_regression_table(
table_name="",
headers=[
"Test Case",
"Baseline {} (median obs. +- std.dev)".format(baseline_str),
"Comparison {} (median obs. +- std.dev)".format(comparison_str),
f"Baseline {baseline_github_org}/{baseline_github_repo} {baseline_str} (median obs. +- std.dev)",
f"Comparison {comparison_github_org}/{comparison_github_repo} {comparison_str} (median obs. +- std.dev)",
"% change ({})".format(metric_mode),
"Note",
],
Expand All @@ -737,7 +749,7 @@ def compute_regression_table(
writer_regressions.dump(mystdout, False)
table_output += mystdout.getvalue()
table_output += "\n\n"
test_names_str = "|".join(improvements_list)
test_names_str = "|".join([l[0] for l in improvements_list])
table_output += f"Improvements test regexp names: {test_names_str}\n\n"
mystdout.close()
sys.stdout = old_stdout
Expand All @@ -748,8 +760,8 @@ def compute_regression_table(
table_name="",
headers=[
"Test Case",
"Baseline {} (median obs. +- std.dev)".format(baseline_str),
"Comparison {} (median obs. +- std.dev)".format(comparison_str),
f"Baseline {baseline_github_org}/{baseline_github_repo} {baseline_str} (median obs. +- std.dev)",
f"Comparison {comparison_github_org}/{comparison_github_repo} {comparison_str} (median obs. +- std.dev)",
"% change ({})".format(metric_mode),
"Note",
],
Expand All @@ -766,7 +778,7 @@ def compute_regression_table(
return (
detected_regressions,
table_output,
total_improvements,
improvements_list,
regressions_list,
total_stable,
total_unstable,
Expand Down Expand Up @@ -1098,7 +1110,12 @@ def from_rts_to_regression_table(
logging.error("Detected a ZeroDivisionError. {}".format(e.__str__()))
pass
unstable = False
if baseline_v != "N/A" and comparison_v != "N/A":
if (
baseline_v != "N/A"
and comparison_pct_change != "N/A"
and comparison_v != "N/A"
and baseline_pct_change != "N/A"
):
if comparison_pct_change > 10.0 or baseline_pct_change > 10.0:
note = "UNSTABLE (very high variance)"
unstable = True
Expand All @@ -1119,6 +1136,10 @@ def from_rts_to_regression_table(
percentage_change = (
float(baseline_v) / float(comparison_v) - 1
) * 100.0
else:
logging.warn(
f"Missing data for test {test_name}. baseline_v={baseline_v} (pct_change={baseline_pct_change}), comparison_v={comparison_v} (pct_change={comparison_pct_change}) "
)
if baseline_v != "N/A" or comparison_v != "N/A":
detected_regression = False
detected_improvement = False
Expand Down Expand Up @@ -1170,7 +1191,7 @@ def from_rts_to_regression_table(
table_regressions.append(line)

if detected_improvement:
improvements_list.append(test_name)
improvements_list.append([test_name, percentage_change])
table_improvements.append(line)

if unstable:
Expand Down Expand Up @@ -1325,23 +1346,25 @@ def get_v_pct_change_and_largest_var(
_, comparison_v = comparison_datapoints[0]
for tuple in comparison_datapoints:
if last_n < 0 or (last_n > 0 and len(comparison_values) < last_n):
comparison_values.append(tuple[1])
comparison_df = pd.DataFrame(comparison_values)
comparison_median = float(comparison_df.median().iloc[0])
comparison_v = comparison_median
comparison_std = float(comparison_df.std().iloc[0])
if verbose:
logging.info(
"comparison_datapoints: {} value: {}; std-dev: {}; median: {}".format(
comparison_datapoints,
comparison_v,
comparison_std,
comparison_median,
if tuple[1] > 0.0:
comparison_values.append(tuple[1])
if len(comparison_values) > 0:
comparison_df = pd.DataFrame(comparison_values)
comparison_median = float(comparison_df.median().iloc[0])
comparison_v = comparison_median
comparison_std = float(comparison_df.std().iloc[0])
if verbose:
logging.info(
"comparison_datapoints: {} value: {}; std-dev: {}; median: {}".format(
comparison_datapoints,
comparison_v,
comparison_std,
comparison_median,
)
)
)
comparison_pct_change = (comparison_std / comparison_median) * 100.0
if comparison_pct_change > largest_variance:
largest_variance = comparison_pct_change
comparison_pct_change = (comparison_std / comparison_median) * 100.0
if comparison_pct_change > largest_variance:
largest_variance = comparison_pct_change
return comparison_pct_change, comparison_v, largest_variance


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1374,7 +1374,7 @@ def process_self_contained_coordinator_stream(
(
detected_regressions,
table_output,
total_improvements,
improvement_list,
regressions_list,
total_stable,
total_unstable,
Expand All @@ -1387,8 +1387,8 @@ def process_self_contained_coordinator_stream(
metric_name,
comparison_branch,
baseline_branch,
baseline_tag,
comparison_tag,
None, # we only compare by branch on CI automation
None, # we only compare by branch on CI automation
baseline_deployment_name,
comparison_deployment_name,
print_improvements_only,
Expand All @@ -1410,37 +1410,44 @@ def process_self_contained_coordinator_stream(
running_platform,
)
total_regressions = len(regressions_list)
total_improvements = len(improvement_list)
auto_approve = True
grafana_link_base = "https://benchmarksredisio.grafana.net/d/1fWbtb7nz/experimental-oss-spec-benchmarks"

prepare_regression_comment(
auto_approve,
baseline_branch,
baseline_tag,
comparison_branch,
comparison_tag,
contains_regression_comment,
github_pr,
grafana_link_base,
is_actionable_pr,
old_regression_comment_body,
pr_link,
regression_comment,
datasink_conn,
running_platform,
table_output,
tf_github_org,
tf_github_repo,
tf_triggering_env,
total_comparison_points,
total_improvements,
total_regressions,
total_stable,
total_unstable,
verbose,
regressions_percent_lower_limit,
regressions_list,
)
try:
prepare_regression_comment(
auto_approve,
baseline_branch,
baseline_tag,
comparison_branch,
comparison_tag,
contains_regression_comment,
github_pr,
grafana_link_base,
is_actionable_pr,
old_regression_comment_body,
pr_link,
regression_comment,
datasink_conn,
running_platform,
table_output,
tf_github_org,
tf_github_repo,
tf_triggering_env,
total_comparison_points,
total_improvements,
total_regressions,
total_stable,
total_unstable,
verbose,
regressions_percent_lower_limit,
regressions_list,
)
except Exception as e:
logging.error(
"Failed to produce regression comment but continuing... Error: {}".format(
e.__str__()
)
)
logging.info(
f"Added test named {test_name} to the completed test list in key {stream_test_list_completed}"
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
version: 0.4
name: memtier_benchmark-100Kkeys-hash-hgetall-50-fields-100B-values
description: Runs memtier_benchmark, for a keyspace length of 1M keys pre-loading HASHes in which the value has a data size of 10 Bytes. After pre-loading the data it issues HGETALL command.
dbconfig:
configuration-parameters:
save: '""'
check:
keyspacelen: 100000
preload_tool:
run_image: redislabs/memtier_benchmark:edge
tool: memtier_benchmark
arguments: '"--data-size" "100" --command "HSET __key__ field:1 __data__ field:2 __data__ field:3 __data__ field:4 __data__ field:5 __data__ field:6 __data__ field:7 __data__ field:8 __data__ field:9 __data__ field:10 __data__ field:11 __data__ field:12 __data__ field:13 __data__ field:14 __data__ field:15 __data__ field:16 __data__ field:17 __data__ field:18 __data__ field:19 __data__ field:20 __data__ field:21 __data__ field:22 __data__ field:23 __data__ field:24 __data__ field:25 __data__ field:26 __data__ field:27 __data__ field:28 __data__ field:29 __data__ field:30 __data__ field:31 __data__ field:32 __data__ field:33 __data__ field:34 __data__ field:35 __data__ field:36 __data__ field:37 __data__ field:38 __data__ field:39 __data__ field:40 __data__ field:41 __data__ field:42 __data__ field:43 __data__ field:44 __data__ field:45 __data__ field:46 __data__ field:47 __data__ field:48 __data__ field:49 __data__ field:50 __data__" --command-key-pattern="P" --key-minimum=1 --key-maximum 1000000 -n 500 -c 50 -t 4 --hide-histogram'
resources:
requests:
memory: 2g
tested-groups:
- hash
tested-commands:
- hgetall
redis-topologies:
- oss-standalone
build-variants:
- gcc:8.5.0-amd64-debian-buster-default
- dockerhub
clientconfig:
run_image: redislabs/memtier_benchmark:edge
tool: memtier_benchmark
arguments: ' --command "HGETALL __key__" --command-key-pattern="R" --key-minimum=1 --key-maximum 100000 --test-time 120 -c 50 -t 4 --hide-histogram'
resources:
requests:
cpus: '4'
memory: 2g

priority: 96
Loading

0 comments on commit d70c269

Please sign in to comment.