Skip to content

Commit

Permalink
Adapt wxray measurement to repeat and adds significatc test
Browse files Browse the repository at this point in the history
  • Loading branch information
vulder committed Jul 8, 2023
1 parent 9bed01e commit db600bf
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 56 deletions.
59 changes: 38 additions & 21 deletions varats/varats/experiments/vara/feature_perf_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@ def __init__(self, path: Path) -> None:
super().__init__(path, TimeReportAggregate)


class MPRTEFA(
MultiPatchReport[TEFReportAggregate], shorthand="MPRTEFA", file_type=".zip"
):

def __init__(self, path: Path) -> None:
super().__init__(path, TEFReportAggregate)


class ReCompile(ProjectStep):
NAME = "RECOMPILE"
DESCRIPTION = "Recompile the project"
Expand Down Expand Up @@ -144,12 +152,14 @@ def __init__(
project: VProject,
binary: ProjectBinaryWrapper,
result_post_fix: str = "",
report_file_ending: str = "json"
report_file_ending: str = "json",
reps=2
):
super().__init__(project=project)
self.__binary = binary
self.__report_file_ending = report_file_ending
self.__result_pre_fix = result_post_fix
self.__reps = reps

def __call__(self, tmp_dir: Path) -> StepResult:
return self.run_traced_code(tmp_dir)
Expand All @@ -162,25 +172,32 @@ def __str__(self, indent: int = 0) -> str:
def run_traced_code(self, tmp_dir: Path) -> StepResult:
"""Runs the binary with the embedded tracing code."""
with local.cwd(local.path(self.project.builddir)):
for prj_command in workload_commands(
self.project, self.__binary, [WorkloadCategory.EXAMPLE]
):
local_tracefile_path = Path(tmp_dir) / (
f"{self.__result_pre_fix}_trace_"
f"{prj_command.command.label}_.{self.__report_file_ending}"
)
with local.env(VARA_TRACE_FILE=local_tracefile_path):
pb_cmd = prj_command.command.as_plumbum(
project=self.project
)
print(f"Running example {prj_command.command.label}")

extra_options = get_extra_config_options(self.project)
with cleanup(prj_command):
pb_cmd(
*extra_options,
retcode=self.__binary.valid_exit_codes
zip_tmp_dir = tmp_dir / f"{self.__result_pre_fix}_rep_measures"
with ZippedReportFolder(zip_tmp_dir) as reps_tmp_dir:
for rep in range(0, self.__reps):
for prj_command in workload_commands(
self.project, self.__binary, [WorkloadCategory.EXAMPLE]
):
local_tracefile_path = Path(reps_tmp_dir) / (
f"trace_{prj_command.command.label}_{rep}_"
f".{self.__report_file_ending}"
)
with local.env(VARA_TRACE_FILE=local_tracefile_path):
pb_cmd = prj_command.command.as_plumbum(
project=self.project
)
print(
f"Running example {prj_command.command.label}"
)

extra_options = get_extra_config_options(
self.project
)
with cleanup(prj_command):
pb_cmd(
*extra_options,
retcode=self.__binary.valid_exit_codes
)

return StepResult.OK

Expand All @@ -190,7 +207,7 @@ class TEFProfileRunner(FeatureExperiment, shorthand="TEFp"):

NAME = "RunTEFProfiler"

REPORT_SPEC = ReportSpecification(TEFReport)
REPORT_SPEC = ReportSpecification(MPRTEFA)

def actions_for_project(
self, project: VProject
Expand Down Expand Up @@ -225,7 +242,7 @@ def actions_for_project(

# Add own error handler to compile step.
project.compile = get_default_compile_error_wrapped(
self.get_handle(), project, TEFReport
self.get_handle(), project, self.REPORT_SPEC.main_report
)

binary = project.binaries[0]
Expand Down
67 changes: 32 additions & 35 deletions varats/varats/tables/feature_perf_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import shutil
import tempfile
import typing as tp
from collections import defaultdict
from pathlib import Path

import numpy as np
Expand All @@ -18,7 +19,12 @@
from varats.paper_mgmt.case_study import get_case_study_file_name_filter
from varats.report.gnu_time_report import TimeReportAggregate
from varats.report.report import BaseReport, ReportFilepath
from varats.report.tef_report import TEFReport, TraceEvent, TraceEventType
from varats.report.tef_report import (
TEFReport,
TraceEvent,
TraceEventType,
TEFReportAggregate,
)
from varats.revision.revisions import get_processed_revisions_files
from varats.table.table import Table
from varats.table.table_utils import dataframe_to_table
Expand Down Expand Up @@ -259,46 +265,38 @@ class VXray(Profiler):
"""Profiler mapper implementation for the vara tef tracer."""

def __init__(self) -> None:
super().__init__("WXray", fpp.TEFProfileRunner, TEFReport)
super().__init__("WXray", fpp.TEFProfileRunner, fpp.MPRTEFA)

def is_regression(self, report_path: ReportFilepath) -> bool:
"""Checks if there was a regression between the old an new data."""
is_regression = False

# with tempfile.TemporaryDirectory() as tmp_result_dir:
# shutil.unpack_archive(
# report_path.full_path(), extract_dir=tmp_result_dir
# )
#
# old_report = None
# new_report = None
# for report in Path(tmp_result_dir).iterdir():
# # print(f"Zipped: {report=}")
# if report.name.endswith("old.json"):
# old_report = load_tef_report(report)
# else:
# new_report = load_tef_report(report)

# if not old_report or not new_report:
# raise AssertionError(
# "Reports where missing in the file {report_path=}"
# )

multi_report = fpp.MultiPatchReport(report_path.full_path(), TEFReport)

old_features = get_feature_performance_from_tef_report(
multi_report.get_old_report()
)
new_features = get_feature_performance_from_tef_report(
multi_report.get_new_report()
multi_report = fpp.MultiPatchReport(
report_path.full_path(), TEFReportAggregate
)

# TODO: correctly implement how to identify a regression
for feature, old_value in old_features.items():
if feature in new_features:
new_value = new_features[feature]
if abs(new_value - old_value) > 10000:
print(f"Found regression for feature {feature}.")
old_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list)
for old_tef_report in multi_report.get_old_report().reports():
pim = get_feature_performance_from_tef_report(old_tef_report)
for feature, value in pim.items():
old_acc_pim[feature].append(value)

new_acc_pim: tp.DefaultDict[str, tp.List[int]] = defaultdict(list)
for new_tef_report in multi_report.get_new_report().reports():
pim = get_feature_performance_from_tef_report(new_tef_report)
for feature, value in pim.items():
new_acc_pim[feature].append(value)

for feature, old_values in old_acc_pim.items():
if feature in new_acc_pim:
new_values = new_acc_pim[feature]
ttest_res = ttest_ind(old_values, new_values)

# TODO: check, maybe we need a "very small value cut off"
if ttest_res.pvalue < 0.05:
print(
f"{self.name} found regression for feature {feature}."
)
is_regression = True
else:
print(f"Could not find feature {feature} in new trace.")
Expand Down Expand Up @@ -335,7 +333,6 @@ def compute_profiler_predictions(

result_dict[config_id] = profiler.is_regression(report_files[0])

print(f"{result_dict=}")
return result_dict


Expand Down

0 comments on commit db600bf

Please sign in to comment.