Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upstreaming 2024 Nyrkiö patches #27

Merged
merged 10 commits into from
Jan 10, 2025
47 changes: 36 additions & 11 deletions hunter/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,18 @@ class ComparativeStats:
std_2: float
pvalue: float

def forward_rel_change(self):
def forward_rel_change(self, value_if_nan=0):
"""Relative change from left to right"""
if self.mean_1 == 0:
return value_if_nan

return self.mean_2 / self.mean_1 - 1.0

def backward_rel_change(self):
def backward_rel_change(self, value_if_nan=0):
"""Relative change from right to left"""
if self.mean_2 == 0:
return value_if_nan

return self.mean_1 / self.mean_2 - 1.0

def forward_change_percent(self) -> float:
Expand Down Expand Up @@ -180,7 +186,6 @@ def merge(
:param max_pvalue: maximum accepted pvalue
:param min_magnitude: minimum accepted relative change
"""

tester = TTestSignificanceTester(max_pvalue)
while change_points:

Expand Down Expand Up @@ -214,7 +219,8 @@ def recompute(index: int):
return change_points


def split(series: np.array, window_len: int = 30, max_pvalue: float = 0.001) -> List[ChangePoint]:
def split(series: np.array, window_len: int = 30, max_pvalue: float = 0.001,
new_points=None, old_cp=None) -> List[ChangePoint]:
"""
Finds change points by splitting the series top-down.

Expand All @@ -237,17 +243,36 @@ def split(series: np.array, window_len: int = 30, max_pvalue: float = 0.001) ->
start = 0
step = int(window_len / 2)
indexes = []
# N new_points are appended to the end of series. Typically N=1.
# old_cp are the weak change points from before new points were added.
# We now just compute e-e_divisive for the tail of the series, beginning at
# max(old_cp[-1], a step that is over 2 window_len from the end)
if new_points is not None and old_cp is not None:
indexes = [c.index for c in old_cp]
steps_needed = new_points/window_len + 4
max_start = len(series) - steps_needed*window_len
for c in old_cp:
if c.index < max_start:
start = c.index
for s in range(0, len(series), step):
if s < max_start and start < s:
start = s

tester = TTestSignificanceTester(max_pvalue)
while start < len(series):
end = min(start + window_len, len(series))
calculator = cext_calculator

algo = EDivisive(seed=None, calculator=calculator, significance_tester=tester)
pts = algo.get_change_points(series[start:end])
new_indexes = [p.index + start for p in pts]
new_indexes.sort()
last_new_change_point_index = next(iter(new_indexes[-1:]), 0)
start = max(last_new_change_point_index, start + step)
indexes += new_indexes
# incremental Hunter can duplicate an old cp
for i in new_indexes:
if i not in indexes:
indexes += [i]

window_endpoints = [0] + indexes + [len(series)]
return [tester.change_point(i, series, window_endpoints) for i in indexes]
Expand All @@ -258,13 +283,13 @@ def compute_change_points_orig(series: np.array, max_pvalue: float = 0.001) -> L
tester = QHatPermutationsSignificanceTester(calculator, pvalue=max_pvalue, permutations=100)
algo = EDivisive(seed=None, calculator=calculator, significance_tester=tester)
pts = algo.get_change_points(series)
indexes = [p.index for p in pts]
window_endpoints = [0] + indexes + [len(series)]
return [tester.change_point(i, series, window_endpoints) for i in indexes]
return pts, None


def compute_change_points(
series: np.array, window_len: int = 50, max_pvalue: float = 0.001, min_magnitude: float = 0.05
series: np.array, window_len: int = 50, max_pvalue: float = 0.001, min_magnitude: float = 0.0,
new_data=None, old_weak_cp=None
) -> List[ChangePoint]:
change_points = split(series, window_len, max_pvalue * 10)
return merge(change_points, series, max_pvalue, min_magnitude)
first_pass_pvalue = max_pvalue * 10 if max_pvalue < 0.05 else (max_pvalue * 2 if max_pvalue < 0.5 else max_pvalue)
weak_change_points = split(series, window_len, first_pass_pvalue, new_points=new_data, old_cp=old_weak_cp)
return merge(weak_change_points, series, max_pvalue, min_magnitude), weak_change_points
2 changes: 1 addition & 1 deletion hunter/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __format_log_annotated(self, test_name: str) -> str:
def __format_json(self, test_name: str) -> str:
import json

return json.dumps({test_name: [cpg.to_json() for cpg in self.__change_points]})
return json.dumps({test_name: [cpg.to_json(rounded=True) for cpg in self.__change_points]})

def __format_regressions_only(self, test_name: str) -> str:
output = []
Expand Down
Loading
Loading