From ef50d4fd4d5b4d10f83d034c0f9d69478df6a091 Mon Sep 17 00:00:00 2001 From: Florian Sattler Date: Wed, 21 Aug 2024 11:03:35 +0200 Subject: [PATCH] Implements experiment to detect the hot functions of a project (#884) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Implements experiment to detect the hot functions of a project * Make xray a local import * Adds missing docs * Update varats-core/varats/report/hot_functions_report.py Co-authored-by: Sebastian Böhm * Rewords docs * Fixes type errors --------- Co-authored-by: Sebastian Böhm --- .../varats/report/hot_functions_report.py | 104 ++++++++++ .../vara/hot_function_experiment.py | 181 ++++++++++++++++++ varats/varats/tables/hot_functions.py | 96 ++++++++++ varats/varats/tools/bb_config.py | 1 + 4 files changed, 382 insertions(+) create mode 100644 varats-core/varats/report/hot_functions_report.py create mode 100644 varats/varats/experiments/vara/hot_function_experiment.py create mode 100644 varats/varats/tables/hot_functions.py diff --git a/varats-core/varats/report/hot_functions_report.py b/varats-core/varats/report/hot_functions_report.py new file mode 100644 index 000000000..8b6aae2d1 --- /dev/null +++ b/varats-core/varats/report/hot_functions_report.py @@ -0,0 +1,104 @@ +import typing as tp +from dataclasses import dataclass +from pathlib import Path + +from pandas import read_csv + +from varats.experiment.workload_util import WorkloadSpecificReportAggregate +from varats.report.report import BaseReport, ReportAggregate + + +@dataclass +class XRayFunctionWrapper: + name: str + count: int + sum_time: float + + +class HotFunctionReport(BaseReport, shorthand="HFR", file_type=".csv"): + """Report class to load and evaluate the hot function data.""" + + MAX_TRACK_FUNCTIONS = 50 + + def __init__(self, path: Path) -> None: + super().__init__(path) + self.__function_data = read_csv(path) + + def top_n_functions(self, limit: int = 10) -> tp.List[XRayFunctionWrapper]: + """Determines the `n` hottest functions in which the most time was + spent.""" + self.__function_data.sort_values( + by='sum', ascending=False, inplace=True + ) + return [ + XRayFunctionWrapper( + name=row["function"], count=row['count'], sum_time=row["sum"] + ) for _, row in self.__function_data.head(limit).iterrows() + ] + + def hot_functions(self, threshold: int = 2) -> tp.List[XRayFunctionWrapper]: + """ + Args: + threshold: min percentage a function needs as total + time to count as hot + """ + if threshold < 0 or threshold > 100: + raise ValueError( + "Threshold value needs to be in the range [0,...,100] " + f"but was {threshold}" + ) + + self.__function_data.sort_values( + by='sum', ascending=False, inplace=True + ) + # The total time tracked only includes time spend in the top n + # (MAX_TRACK_FUNCTIONS) functions + total_time_tracked = self.__function_data["sum"].max() + + if threshold == 0: + sum_time_cutoff = 0 + else: + sum_time_cutoff = (total_time_tracked * threshold) / 100 + + return [ + XRayFunctionWrapper( + name=row["function"], count=row['count'], sum_time=row["sum"] + ) + for _, row in self.__function_data.iterrows() + if row["sum"] > sum_time_cutoff + ] + + def print_full_dump(self) -> None: + print(f"{self.__function_data}") + + +class WLHotFunctionAggregate( + WorkloadSpecificReportAggregate[HotFunctionReport], + shorthand="WL" + HotFunctionReport.SHORTHAND + ReportAggregate.SHORTHAND, + file_type=ReportAggregate.FILE_TYPE +): + + def __init__(self, path: Path) -> None: + super().__init__(path, HotFunctionReport) + + def dump_all_reports(self) -> None: + """Dumps the contents of all loaded hot functions reports.""" + for wl_name in self.workload_names(): + for report in self.reports(wl_name): + report.print_full_dump() + + def hot_functions_per_workload( + self, threshold: int = 2 + ) -> tp.Dict[str, tp.List[XRayFunctionWrapper]]: + """ + Args: + threshold: min percentage a function needs as + total time to count as hot + """ + res: tp.Dict[str, tp.List[XRayFunctionWrapper]] = {} + for wl_name in self.workload_names(): + # TODO: repetition handling + for report in self.reports(wl_name): + res[wl_name] = report.hot_functions(threshold=threshold) + + return res diff --git a/varats/varats/experiments/vara/hot_function_experiment.py b/varats/varats/experiments/vara/hot_function_experiment.py new file mode 100644 index 000000000..e17cee9d0 --- /dev/null +++ b/varats/varats/experiments/vara/hot_function_experiment.py @@ -0,0 +1,181 @@ +"""Experiment that detects the hot functions of a project.""" +import typing as tp +from pathlib import Path + +from benchbuild.command import ProjectCommand, cleanup +from benchbuild.extensions import compiler, run, time +from benchbuild.utils import actions +from plumbum import local + +from varats.experiment.experiment_util import ( + ZippedReportFolder, + create_new_success_result_filepath, + get_default_compile_error_wrapped, + ExperimentHandle, +) +from varats.experiment.workload_util import WorkloadCategory, workload_commands +from varats.experiments.vara.feature_experiment import FeatureExperiment +from varats.experiments.vara.feature_perf_precision import ( + select_project_binaries, +) +from varats.project.project_util import BinaryType, ProjectBinaryWrapper +from varats.project.varats_project import VProject +from varats.report.hot_functions_report import ( + HotFunctionReport, + WLHotFunctionAggregate, +) +from varats.report.report import ReportSpecification +from varats.utils.config import get_current_config_id + + +def perf_prec_workload_commands( + project: VProject, binary: ProjectBinaryWrapper +) -> tp.List[ProjectCommand]: + """Uniformly select the workloads that should be processed.""" + + wl_commands = [] + + if not project.name.startswith( + "SynthIP" + ) and project.name != "SynthSAFieldSensitivity": + # Example commands from these CS are to "fast" + wl_commands += workload_commands( + project, binary, [WorkloadCategory.EXAMPLE] + ) + + wl_commands += workload_commands(project, binary, [WorkloadCategory.SMALL]) + + wl_commands += workload_commands(project, binary, [WorkloadCategory.MEDIUM]) + + return wl_commands + + +class RunXRayProfiler(actions.ProjectStep): # type: ignore + """Profiling step that runs a XRay instrumented binary to extract function- + level measurement data.""" + + NAME = "RunInstrumentedXRayBinaries" + DESCRIPTION = "Profile a project that was instrumented \ + with xray instrumentations." + + project: VProject + + def __init__( + self, project: VProject, experiment_handle: ExperimentHandle + ) -> None: + super().__init__(project=project) + self.__experiment_handle = experiment_handle + + def __call__(self) -> actions.StepResult: + return self.run_instrumented_code() + + def __str__(self, indent: int = 0) -> str: + return str( + actions.textwrap.indent( + f"* {self.project.name}: Run VaRA " + "measurements together with XRay", indent * " " + ) + ) + + def run_instrumented_code(self) -> actions.StepResult: + """Run the instrumented code to detect hot functions.""" + # pylint: disable=import-outside-toplevel + from plumbum.cmd import llvm_xray + + for binary in self.project.binaries: + if binary.type != BinaryType.EXECUTABLE: + # Skip libraries as we cannot run them + continue + + with local.cwd(local.path(self.project.builddir)): + + result_filepath = create_new_success_result_filepath( + exp_handle=self.__experiment_handle, + report_type=self.__experiment_handle.report_spec(). + main_report, + project=self.project, + binary=binary, + config_id=get_current_config_id(self.project) + ) + with ZippedReportFolder( + result_filepath.full_path() + ) as reps_tmp_dir: + for rep in range(0, 1): + for prj_command in perf_prec_workload_commands( + project=self.project, binary=binary + ): + hot_function_report_file = Path(reps_tmp_dir) / ( + "hot-func-trace_" + f"{prj_command.command.label}_{rep}" + ".csv" + ) + + unique_tracefile_tag = \ + f"xray_{prj_command.command.label}_{rep}." + with local.env( + XRAY_OPTIONS=" ".join([ + "patch_premain=true", + "xray_mode=xray-basic", + f"xray_logfile_base={unique_tracefile_tag}" + ]) + ): + with cleanup(prj_command): + pb_cmd = prj_command.command.as_plumbum( + project=self.project + ) + pb_cmd(retcode=binary.valid_exit_codes) + + for f in Path(".").iterdir(): + if f.name.startswith(unique_tracefile_tag): + xray_log_path = f.absolute() + break + + instr_map_path = local.path( + self.project.primary_source + ) / binary.path + + llvm_xray( + "account", f"{xray_log_path}", + "--deduce-sibling-calls", + f"--instr_map={instr_map_path}", + f"--output={hot_function_report_file}", + "--format=csv", + f"--top={HotFunctionReport.MAX_TRACK_FUNCTIONS}" + ) + + return actions.StepResult.OK + + +class XRayFindHotFunctions(FeatureExperiment, shorthand="HF"): + """Experiment for finding hot functions in code.""" + + NAME = "DetermineHotFunctions" + REPORT_SPEC = ReportSpecification(WLHotFunctionAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + project.cflags += [ + "-fxray-instrument", + "-fxray-instruction-threshold=1", + ] + + project.runtime_extension = run.RuntimeExtension(project, self) \ + << time.RunWithTime() + + project.compiler_extension = compiler.RunCompiler(project, self) + + project.compile = get_default_compile_error_wrapped( + self.get_handle(), project, + self.get_handle().report_spec().main_report + ) + + binary = select_project_binaries(project)[0] + if binary.type != BinaryType.EXECUTABLE: + raise AssertionError("Experiment only works with executables.") + + return [ + actions.Compile(project), + RunXRayProfiler(project, self.get_handle()), + actions.Clean(project), + ] diff --git a/varats/varats/tables/hot_functions.py b/varats/varats/tables/hot_functions.py new file mode 100644 index 000000000..85ec2da17 --- /dev/null +++ b/varats/varats/tables/hot_functions.py @@ -0,0 +1,96 @@ +"""Module for the HotFunctionsTable.""" +import typing as tp + +import pandas as pd + +from varats.experiments.vara.hot_function_experiment import XRayFindHotFunctions +from varats.paper.paper_config import get_loaded_paper_config +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.report.hot_functions_report import WLHotFunctionAggregate +from varats.revision.revisions import get_processed_revisions_files +from varats.table.table import Table +from varats.table.table_utils import dataframe_to_table +from varats.table.tables import TableFormat, TableGenerator + + +class HotFunctionsTable(Table, table_name="hot_functions"): + """A concice table that provides a quick overview of all the detected hot + functions.""" + + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + case_studies = get_loaded_paper_config().get_all_case_studies() + + df = pd.DataFrame() + + for case_study in case_studies: + project_name = case_study.project_name + + experiment_type = XRayFindHotFunctions + report_files = get_processed_revisions_files( + project_name, experiment_type, WLHotFunctionAggregate, + get_case_study_file_name_filter(case_study) + ) + + for report_filepath in report_files: + agg_hot_functions_report = WLHotFunctionAggregate( + report_filepath.full_path() + ) + report_file = agg_hot_functions_report.filename + + hot_funcs = agg_hot_functions_report.hot_functions_per_workload( + threshold=2 + ) + + entries = [] + for workload_name in agg_hot_functions_report.workload_names(): + hot_func_data = hot_funcs[workload_name] + for hf in hot_func_data: + new_row = { + "Project": + project_name, + "Binary": + report_file.binary_name, + "Revision": + str(report_file.commit_hash), + "Workload": + workload_name, + "FunctionName": + hf.name, + "TimeSpent": + hf.sum_time, + "Reps": + len( + agg_hot_functions_report. + reports(workload_name) + ) + } + + # df = df.append(new_row, ignore_index=True) + entries.append(pd.DataFrame([new_row])) + + df = pd.concat(entries, ignore_index=True) + + df.sort_values(["Project", "Binary"], inplace=True) + df.set_index( + ["Project", "Binary"], + inplace=True, + ) + + kwargs: tp.Dict[str, tp.Any] = {} + + return dataframe_to_table( + df, + table_format, + wrap_table=wrap_table, + wrap_landscape=True, + **kwargs + ) + + +class HotFunctionsTableGenerator( + TableGenerator, generator_name="hot-functions", options=[] +): + """Generator for `HotFunctionsTable`.""" + + def generate(self) -> tp.List[Table]: + return [HotFunctionsTable(self.table_config, **self.table_kwargs)] diff --git a/varats/varats/tools/bb_config.py b/varats/varats/tools/bb_config.py index 3bca864d0..3b3ecff37 100644 --- a/varats/varats/tools/bb_config.py +++ b/varats/varats/tools/bb_config.py @@ -123,6 +123,7 @@ def update_experiments(bb_cfg: s.Configuration) -> None: 'varats.experiments.vara.marker_tester', 'varats.experiments.vara.phasar_fta', 'varats.experiments.vara.feature_region_verifier_experiment', + 'varats.experiments.vara.hot_function_experiment', ]