-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implements experiment to detect the hot functions of a project (#884)
* Implements experiment to detect the hot functions of a project * Make xray a local import * Adds missing docs * Update varats-core/varats/report/hot_functions_report.py Co-authored-by: Sebastian Böhm <[email protected]> * Rewords docs * Fixes type errors --------- Co-authored-by: Sebastian Böhm <[email protected]>
- Loading branch information
Showing
4 changed files
with
382 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
import typing as tp | ||
from dataclasses import dataclass | ||
from pathlib import Path | ||
|
||
from pandas import read_csv | ||
|
||
from varats.experiment.workload_util import WorkloadSpecificReportAggregate | ||
from varats.report.report import BaseReport, ReportAggregate | ||
|
||
|
||
@dataclass | ||
class XRayFunctionWrapper: | ||
name: str | ||
count: int | ||
sum_time: float | ||
|
||
|
||
class HotFunctionReport(BaseReport, shorthand="HFR", file_type=".csv"): | ||
"""Report class to load and evaluate the hot function data.""" | ||
|
||
MAX_TRACK_FUNCTIONS = 50 | ||
|
||
def __init__(self, path: Path) -> None: | ||
super().__init__(path) | ||
self.__function_data = read_csv(path) | ||
|
||
def top_n_functions(self, limit: int = 10) -> tp.List[XRayFunctionWrapper]: | ||
"""Determines the `n` hottest functions in which the most time was | ||
spent.""" | ||
self.__function_data.sort_values( | ||
by='sum', ascending=False, inplace=True | ||
) | ||
return [ | ||
XRayFunctionWrapper( | ||
name=row["function"], count=row['count'], sum_time=row["sum"] | ||
) for _, row in self.__function_data.head(limit).iterrows() | ||
] | ||
|
||
def hot_functions(self, threshold: int = 2) -> tp.List[XRayFunctionWrapper]: | ||
""" | ||
Args: | ||
threshold: min percentage a function needs as total | ||
time to count as hot | ||
""" | ||
if threshold < 0 or threshold > 100: | ||
raise ValueError( | ||
"Threshold value needs to be in the range [0,...,100] " | ||
f"but was {threshold}" | ||
) | ||
|
||
self.__function_data.sort_values( | ||
by='sum', ascending=False, inplace=True | ||
) | ||
# The total time tracked only includes time spend in the top n | ||
# (MAX_TRACK_FUNCTIONS) functions | ||
total_time_tracked = self.__function_data["sum"].max() | ||
|
||
if threshold == 0: | ||
sum_time_cutoff = 0 | ||
else: | ||
sum_time_cutoff = (total_time_tracked * threshold) / 100 | ||
|
||
return [ | ||
XRayFunctionWrapper( | ||
name=row["function"], count=row['count'], sum_time=row["sum"] | ||
) | ||
for _, row in self.__function_data.iterrows() | ||
if row["sum"] > sum_time_cutoff | ||
] | ||
|
||
def print_full_dump(self) -> None: | ||
print(f"{self.__function_data}") | ||
|
||
|
||
class WLHotFunctionAggregate( | ||
WorkloadSpecificReportAggregate[HotFunctionReport], | ||
shorthand="WL" + HotFunctionReport.SHORTHAND + ReportAggregate.SHORTHAND, | ||
file_type=ReportAggregate.FILE_TYPE | ||
): | ||
|
||
def __init__(self, path: Path) -> None: | ||
super().__init__(path, HotFunctionReport) | ||
|
||
def dump_all_reports(self) -> None: | ||
"""Dumps the contents of all loaded hot functions reports.""" | ||
for wl_name in self.workload_names(): | ||
for report in self.reports(wl_name): | ||
report.print_full_dump() | ||
|
||
def hot_functions_per_workload( | ||
self, threshold: int = 2 | ||
) -> tp.Dict[str, tp.List[XRayFunctionWrapper]]: | ||
""" | ||
Args: | ||
threshold: min percentage a function needs as | ||
total time to count as hot | ||
""" | ||
res: tp.Dict[str, tp.List[XRayFunctionWrapper]] = {} | ||
for wl_name in self.workload_names(): | ||
# TODO: repetition handling | ||
for report in self.reports(wl_name): | ||
res[wl_name] = report.hot_functions(threshold=threshold) | ||
|
||
return res |
181 changes: 181 additions & 0 deletions
181
varats/varats/experiments/vara/hot_function_experiment.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
"""Experiment that detects the hot functions of a project.""" | ||
import typing as tp | ||
from pathlib import Path | ||
|
||
from benchbuild.command import ProjectCommand, cleanup | ||
from benchbuild.extensions import compiler, run, time | ||
from benchbuild.utils import actions | ||
from plumbum import local | ||
|
||
from varats.experiment.experiment_util import ( | ||
ZippedReportFolder, | ||
create_new_success_result_filepath, | ||
get_default_compile_error_wrapped, | ||
ExperimentHandle, | ||
) | ||
from varats.experiment.workload_util import WorkloadCategory, workload_commands | ||
from varats.experiments.vara.feature_experiment import FeatureExperiment | ||
from varats.experiments.vara.feature_perf_precision import ( | ||
select_project_binaries, | ||
) | ||
from varats.project.project_util import BinaryType, ProjectBinaryWrapper | ||
from varats.project.varats_project import VProject | ||
from varats.report.hot_functions_report import ( | ||
HotFunctionReport, | ||
WLHotFunctionAggregate, | ||
) | ||
from varats.report.report import ReportSpecification | ||
from varats.utils.config import get_current_config_id | ||
|
||
|
||
def perf_prec_workload_commands( | ||
project: VProject, binary: ProjectBinaryWrapper | ||
) -> tp.List[ProjectCommand]: | ||
"""Uniformly select the workloads that should be processed.""" | ||
|
||
wl_commands = [] | ||
|
||
if not project.name.startswith( | ||
"SynthIP" | ||
) and project.name != "SynthSAFieldSensitivity": | ||
# Example commands from these CS are to "fast" | ||
wl_commands += workload_commands( | ||
project, binary, [WorkloadCategory.EXAMPLE] | ||
) | ||
|
||
wl_commands += workload_commands(project, binary, [WorkloadCategory.SMALL]) | ||
|
||
wl_commands += workload_commands(project, binary, [WorkloadCategory.MEDIUM]) | ||
|
||
return wl_commands | ||
|
||
|
||
class RunXRayProfiler(actions.ProjectStep): # type: ignore | ||
"""Profiling step that runs a XRay instrumented binary to extract function- | ||
level measurement data.""" | ||
|
||
NAME = "RunInstrumentedXRayBinaries" | ||
DESCRIPTION = "Profile a project that was instrumented \ | ||
with xray instrumentations." | ||
|
||
project: VProject | ||
|
||
def __init__( | ||
self, project: VProject, experiment_handle: ExperimentHandle | ||
) -> None: | ||
super().__init__(project=project) | ||
self.__experiment_handle = experiment_handle | ||
|
||
def __call__(self) -> actions.StepResult: | ||
return self.run_instrumented_code() | ||
|
||
def __str__(self, indent: int = 0) -> str: | ||
return str( | ||
actions.textwrap.indent( | ||
f"* {self.project.name}: Run VaRA " | ||
"measurements together with XRay", indent * " " | ||
) | ||
) | ||
|
||
def run_instrumented_code(self) -> actions.StepResult: | ||
"""Run the instrumented code to detect hot functions.""" | ||
# pylint: disable=import-outside-toplevel | ||
from plumbum.cmd import llvm_xray | ||
|
||
for binary in self.project.binaries: | ||
if binary.type != BinaryType.EXECUTABLE: | ||
# Skip libraries as we cannot run them | ||
continue | ||
|
||
with local.cwd(local.path(self.project.builddir)): | ||
|
||
result_filepath = create_new_success_result_filepath( | ||
exp_handle=self.__experiment_handle, | ||
report_type=self.__experiment_handle.report_spec(). | ||
main_report, | ||
project=self.project, | ||
binary=binary, | ||
config_id=get_current_config_id(self.project) | ||
) | ||
with ZippedReportFolder( | ||
result_filepath.full_path() | ||
) as reps_tmp_dir: | ||
for rep in range(0, 1): | ||
for prj_command in perf_prec_workload_commands( | ||
project=self.project, binary=binary | ||
): | ||
hot_function_report_file = Path(reps_tmp_dir) / ( | ||
"hot-func-trace_" | ||
f"{prj_command.command.label}_{rep}" | ||
".csv" | ||
) | ||
|
||
unique_tracefile_tag = \ | ||
f"xray_{prj_command.command.label}_{rep}." | ||
with local.env( | ||
XRAY_OPTIONS=" ".join([ | ||
"patch_premain=true", | ||
"xray_mode=xray-basic", | ||
f"xray_logfile_base={unique_tracefile_tag}" | ||
]) | ||
): | ||
with cleanup(prj_command): | ||
pb_cmd = prj_command.command.as_plumbum( | ||
project=self.project | ||
) | ||
pb_cmd(retcode=binary.valid_exit_codes) | ||
|
||
for f in Path(".").iterdir(): | ||
if f.name.startswith(unique_tracefile_tag): | ||
xray_log_path = f.absolute() | ||
break | ||
|
||
instr_map_path = local.path( | ||
self.project.primary_source | ||
) / binary.path | ||
|
||
llvm_xray( | ||
"account", f"{xray_log_path}", | ||
"--deduce-sibling-calls", | ||
f"--instr_map={instr_map_path}", | ||
f"--output={hot_function_report_file}", | ||
"--format=csv", | ||
f"--top={HotFunctionReport.MAX_TRACK_FUNCTIONS}" | ||
) | ||
|
||
return actions.StepResult.OK | ||
|
||
|
||
class XRayFindHotFunctions(FeatureExperiment, shorthand="HF"): | ||
"""Experiment for finding hot functions in code.""" | ||
|
||
NAME = "DetermineHotFunctions" | ||
REPORT_SPEC = ReportSpecification(WLHotFunctionAggregate) | ||
|
||
def actions_for_project( | ||
self, project: VProject | ||
) -> tp.MutableSequence[actions.Step]: | ||
project.cflags += [ | ||
"-fxray-instrument", | ||
"-fxray-instruction-threshold=1", | ||
] | ||
|
||
project.runtime_extension = run.RuntimeExtension(project, self) \ | ||
<< time.RunWithTime() | ||
|
||
project.compiler_extension = compiler.RunCompiler(project, self) | ||
|
||
project.compile = get_default_compile_error_wrapped( | ||
self.get_handle(), project, | ||
self.get_handle().report_spec().main_report | ||
) | ||
|
||
binary = select_project_binaries(project)[0] | ||
if binary.type != BinaryType.EXECUTABLE: | ||
raise AssertionError("Experiment only works with executables.") | ||
|
||
return [ | ||
actions.Compile(project), | ||
RunXRayProfiler(project, self.get_handle()), | ||
actions.Clean(project), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
"""Module for the HotFunctionsTable.""" | ||
import typing as tp | ||
|
||
import pandas as pd | ||
|
||
from varats.experiments.vara.hot_function_experiment import XRayFindHotFunctions | ||
from varats.paper.paper_config import get_loaded_paper_config | ||
from varats.paper_mgmt.case_study import get_case_study_file_name_filter | ||
from varats.report.hot_functions_report import WLHotFunctionAggregate | ||
from varats.revision.revisions import get_processed_revisions_files | ||
from varats.table.table import Table | ||
from varats.table.table_utils import dataframe_to_table | ||
from varats.table.tables import TableFormat, TableGenerator | ||
|
||
|
||
class HotFunctionsTable(Table, table_name="hot_functions"): | ||
"""A concice table that provides a quick overview of all the detected hot | ||
functions.""" | ||
|
||
def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: | ||
case_studies = get_loaded_paper_config().get_all_case_studies() | ||
|
||
df = pd.DataFrame() | ||
|
||
for case_study in case_studies: | ||
project_name = case_study.project_name | ||
|
||
experiment_type = XRayFindHotFunctions | ||
report_files = get_processed_revisions_files( | ||
project_name, experiment_type, WLHotFunctionAggregate, | ||
get_case_study_file_name_filter(case_study) | ||
) | ||
|
||
for report_filepath in report_files: | ||
agg_hot_functions_report = WLHotFunctionAggregate( | ||
report_filepath.full_path() | ||
) | ||
report_file = agg_hot_functions_report.filename | ||
|
||
hot_funcs = agg_hot_functions_report.hot_functions_per_workload( | ||
threshold=2 | ||
) | ||
|
||
entries = [] | ||
for workload_name in agg_hot_functions_report.workload_names(): | ||
hot_func_data = hot_funcs[workload_name] | ||
for hf in hot_func_data: | ||
new_row = { | ||
"Project": | ||
project_name, | ||
"Binary": | ||
report_file.binary_name, | ||
"Revision": | ||
str(report_file.commit_hash), | ||
"Workload": | ||
workload_name, | ||
"FunctionName": | ||
hf.name, | ||
"TimeSpent": | ||
hf.sum_time, | ||
"Reps": | ||
len( | ||
agg_hot_functions_report. | ||
reports(workload_name) | ||
) | ||
} | ||
|
||
# df = df.append(new_row, ignore_index=True) | ||
entries.append(pd.DataFrame([new_row])) | ||
|
||
df = pd.concat(entries, ignore_index=True) | ||
|
||
df.sort_values(["Project", "Binary"], inplace=True) | ||
df.set_index( | ||
["Project", "Binary"], | ||
inplace=True, | ||
) | ||
|
||
kwargs: tp.Dict[str, tp.Any] = {} | ||
|
||
return dataframe_to_table( | ||
df, | ||
table_format, | ||
wrap_table=wrap_table, | ||
wrap_landscape=True, | ||
**kwargs | ||
) | ||
|
||
|
||
class HotFunctionsTableGenerator( | ||
TableGenerator, generator_name="hot-functions", options=[] | ||
): | ||
"""Generator for `HotFunctionsTable`.""" | ||
|
||
def generate(self) -> tp.List[Table]: | ||
return [HotFunctionsTable(self.table_config, **self.table_kwargs)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters