Skip to content

Commit

Permalink
Implements experiment to detect the hot functions of a project (#884)
Browse files Browse the repository at this point in the history
* Implements experiment to detect the hot functions of a project

* Make xray a local import

* Adds missing docs

* Update varats-core/varats/report/hot_functions_report.py

Co-authored-by: Sebastian Böhm <[email protected]>

* Rewords docs

* Fixes type errors

---------

Co-authored-by: Sebastian Böhm <[email protected]>
  • Loading branch information
vulder and boehmseb authored Aug 21, 2024
1 parent 08aa984 commit ef50d4f
Show file tree
Hide file tree
Showing 4 changed files with 382 additions and 0 deletions.
104 changes: 104 additions & 0 deletions varats-core/varats/report/hot_functions_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import typing as tp
from dataclasses import dataclass
from pathlib import Path

from pandas import read_csv

from varats.experiment.workload_util import WorkloadSpecificReportAggregate
from varats.report.report import BaseReport, ReportAggregate


@dataclass
class XRayFunctionWrapper:
name: str
count: int
sum_time: float


class HotFunctionReport(BaseReport, shorthand="HFR", file_type=".csv"):
"""Report class to load and evaluate the hot function data."""

MAX_TRACK_FUNCTIONS = 50

def __init__(self, path: Path) -> None:
super().__init__(path)
self.__function_data = read_csv(path)

def top_n_functions(self, limit: int = 10) -> tp.List[XRayFunctionWrapper]:
"""Determines the `n` hottest functions in which the most time was
spent."""
self.__function_data.sort_values(
by='sum', ascending=False, inplace=True
)
return [
XRayFunctionWrapper(
name=row["function"], count=row['count'], sum_time=row["sum"]
) for _, row in self.__function_data.head(limit).iterrows()
]

def hot_functions(self, threshold: int = 2) -> tp.List[XRayFunctionWrapper]:
"""
Args:
threshold: min percentage a function needs as total
time to count as hot
"""
if threshold < 0 or threshold > 100:
raise ValueError(
"Threshold value needs to be in the range [0,...,100] "
f"but was {threshold}"
)

self.__function_data.sort_values(
by='sum', ascending=False, inplace=True
)
# The total time tracked only includes time spend in the top n
# (MAX_TRACK_FUNCTIONS) functions
total_time_tracked = self.__function_data["sum"].max()

if threshold == 0:
sum_time_cutoff = 0
else:
sum_time_cutoff = (total_time_tracked * threshold) / 100

return [
XRayFunctionWrapper(
name=row["function"], count=row['count'], sum_time=row["sum"]
)
for _, row in self.__function_data.iterrows()
if row["sum"] > sum_time_cutoff
]

def print_full_dump(self) -> None:
print(f"{self.__function_data}")


class WLHotFunctionAggregate(
WorkloadSpecificReportAggregate[HotFunctionReport],
shorthand="WL" + HotFunctionReport.SHORTHAND + ReportAggregate.SHORTHAND,
file_type=ReportAggregate.FILE_TYPE
):

def __init__(self, path: Path) -> None:
super().__init__(path, HotFunctionReport)

def dump_all_reports(self) -> None:
"""Dumps the contents of all loaded hot functions reports."""
for wl_name in self.workload_names():
for report in self.reports(wl_name):
report.print_full_dump()

def hot_functions_per_workload(
self, threshold: int = 2
) -> tp.Dict[str, tp.List[XRayFunctionWrapper]]:
"""
Args:
threshold: min percentage a function needs as
total time to count as hot
"""
res: tp.Dict[str, tp.List[XRayFunctionWrapper]] = {}
for wl_name in self.workload_names():
# TODO: repetition handling
for report in self.reports(wl_name):
res[wl_name] = report.hot_functions(threshold=threshold)

return res
181 changes: 181 additions & 0 deletions varats/varats/experiments/vara/hot_function_experiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
"""Experiment that detects the hot functions of a project."""
import typing as tp
from pathlib import Path

from benchbuild.command import ProjectCommand, cleanup
from benchbuild.extensions import compiler, run, time
from benchbuild.utils import actions
from plumbum import local

from varats.experiment.experiment_util import (
ZippedReportFolder,
create_new_success_result_filepath,
get_default_compile_error_wrapped,
ExperimentHandle,
)
from varats.experiment.workload_util import WorkloadCategory, workload_commands
from varats.experiments.vara.feature_experiment import FeatureExperiment
from varats.experiments.vara.feature_perf_precision import (
select_project_binaries,
)
from varats.project.project_util import BinaryType, ProjectBinaryWrapper
from varats.project.varats_project import VProject
from varats.report.hot_functions_report import (
HotFunctionReport,
WLHotFunctionAggregate,
)
from varats.report.report import ReportSpecification
from varats.utils.config import get_current_config_id


def perf_prec_workload_commands(
project: VProject, binary: ProjectBinaryWrapper
) -> tp.List[ProjectCommand]:
"""Uniformly select the workloads that should be processed."""

wl_commands = []

if not project.name.startswith(
"SynthIP"
) and project.name != "SynthSAFieldSensitivity":
# Example commands from these CS are to "fast"
wl_commands += workload_commands(
project, binary, [WorkloadCategory.EXAMPLE]
)

wl_commands += workload_commands(project, binary, [WorkloadCategory.SMALL])

wl_commands += workload_commands(project, binary, [WorkloadCategory.MEDIUM])

return wl_commands


class RunXRayProfiler(actions.ProjectStep): # type: ignore
"""Profiling step that runs a XRay instrumented binary to extract function-
level measurement data."""

NAME = "RunInstrumentedXRayBinaries"
DESCRIPTION = "Profile a project that was instrumented \
with xray instrumentations."

project: VProject

def __init__(
self, project: VProject, experiment_handle: ExperimentHandle
) -> None:
super().__init__(project=project)
self.__experiment_handle = experiment_handle

def __call__(self) -> actions.StepResult:
return self.run_instrumented_code()

def __str__(self, indent: int = 0) -> str:
return str(
actions.textwrap.indent(
f"* {self.project.name}: Run VaRA "
"measurements together with XRay", indent * " "
)
)

def run_instrumented_code(self) -> actions.StepResult:
"""Run the instrumented code to detect hot functions."""
# pylint: disable=import-outside-toplevel
from plumbum.cmd import llvm_xray

for binary in self.project.binaries:
if binary.type != BinaryType.EXECUTABLE:
# Skip libraries as we cannot run them
continue

with local.cwd(local.path(self.project.builddir)):

result_filepath = create_new_success_result_filepath(
exp_handle=self.__experiment_handle,
report_type=self.__experiment_handle.report_spec().
main_report,
project=self.project,
binary=binary,
config_id=get_current_config_id(self.project)
)
with ZippedReportFolder(
result_filepath.full_path()
) as reps_tmp_dir:
for rep in range(0, 1):
for prj_command in perf_prec_workload_commands(
project=self.project, binary=binary
):
hot_function_report_file = Path(reps_tmp_dir) / (
"hot-func-trace_"
f"{prj_command.command.label}_{rep}"
".csv"
)

unique_tracefile_tag = \
f"xray_{prj_command.command.label}_{rep}."
with local.env(
XRAY_OPTIONS=" ".join([
"patch_premain=true",
"xray_mode=xray-basic",
f"xray_logfile_base={unique_tracefile_tag}"
])
):
with cleanup(prj_command):
pb_cmd = prj_command.command.as_plumbum(
project=self.project
)
pb_cmd(retcode=binary.valid_exit_codes)

for f in Path(".").iterdir():
if f.name.startswith(unique_tracefile_tag):
xray_log_path = f.absolute()
break

instr_map_path = local.path(
self.project.primary_source
) / binary.path

llvm_xray(
"account", f"{xray_log_path}",
"--deduce-sibling-calls",
f"--instr_map={instr_map_path}",
f"--output={hot_function_report_file}",
"--format=csv",
f"--top={HotFunctionReport.MAX_TRACK_FUNCTIONS}"
)

return actions.StepResult.OK


class XRayFindHotFunctions(FeatureExperiment, shorthand="HF"):
"""Experiment for finding hot functions in code."""

NAME = "DetermineHotFunctions"
REPORT_SPEC = ReportSpecification(WLHotFunctionAggregate)

def actions_for_project(
self, project: VProject
) -> tp.MutableSequence[actions.Step]:
project.cflags += [
"-fxray-instrument",
"-fxray-instruction-threshold=1",
]

project.runtime_extension = run.RuntimeExtension(project, self) \
<< time.RunWithTime()

project.compiler_extension = compiler.RunCompiler(project, self)

project.compile = get_default_compile_error_wrapped(
self.get_handle(), project,
self.get_handle().report_spec().main_report
)

binary = select_project_binaries(project)[0]
if binary.type != BinaryType.EXECUTABLE:
raise AssertionError("Experiment only works with executables.")

return [
actions.Compile(project),
RunXRayProfiler(project, self.get_handle()),
actions.Clean(project),
]
96 changes: 96 additions & 0 deletions varats/varats/tables/hot_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Module for the HotFunctionsTable."""
import typing as tp

import pandas as pd

from varats.experiments.vara.hot_function_experiment import XRayFindHotFunctions
from varats.paper.paper_config import get_loaded_paper_config
from varats.paper_mgmt.case_study import get_case_study_file_name_filter
from varats.report.hot_functions_report import WLHotFunctionAggregate
from varats.revision.revisions import get_processed_revisions_files
from varats.table.table import Table
from varats.table.table_utils import dataframe_to_table
from varats.table.tables import TableFormat, TableGenerator


class HotFunctionsTable(Table, table_name="hot_functions"):
"""A concice table that provides a quick overview of all the detected hot
functions."""

def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str:
case_studies = get_loaded_paper_config().get_all_case_studies()

df = pd.DataFrame()

for case_study in case_studies:
project_name = case_study.project_name

experiment_type = XRayFindHotFunctions
report_files = get_processed_revisions_files(
project_name, experiment_type, WLHotFunctionAggregate,
get_case_study_file_name_filter(case_study)
)

for report_filepath in report_files:
agg_hot_functions_report = WLHotFunctionAggregate(
report_filepath.full_path()
)
report_file = agg_hot_functions_report.filename

hot_funcs = agg_hot_functions_report.hot_functions_per_workload(
threshold=2
)

entries = []
for workload_name in agg_hot_functions_report.workload_names():
hot_func_data = hot_funcs[workload_name]
for hf in hot_func_data:
new_row = {
"Project":
project_name,
"Binary":
report_file.binary_name,
"Revision":
str(report_file.commit_hash),
"Workload":
workload_name,
"FunctionName":
hf.name,
"TimeSpent":
hf.sum_time,
"Reps":
len(
agg_hot_functions_report.
reports(workload_name)
)
}

# df = df.append(new_row, ignore_index=True)
entries.append(pd.DataFrame([new_row]))

df = pd.concat(entries, ignore_index=True)

df.sort_values(["Project", "Binary"], inplace=True)
df.set_index(
["Project", "Binary"],
inplace=True,
)

kwargs: tp.Dict[str, tp.Any] = {}

return dataframe_to_table(
df,
table_format,
wrap_table=wrap_table,
wrap_landscape=True,
**kwargs
)


class HotFunctionsTableGenerator(
TableGenerator, generator_name="hot-functions", options=[]
):
"""Generator for `HotFunctionsTable`."""

def generate(self) -> tp.List[Table]:
return [HotFunctionsTable(self.table_config, **self.table_kwargs)]
1 change: 1 addition & 0 deletions varats/varats/tools/bb_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def update_experiments(bb_cfg: s.Configuration) -> None:
'varats.experiments.vara.marker_tester',
'varats.experiments.vara.phasar_fta',
'varats.experiments.vara.feature_region_verifier_experiment',
'varats.experiments.vara.hot_function_experiment',
]


Expand Down

0 comments on commit ef50d4f

Please sign in to comment.