Skip to content

Commit

Permalink
next_iter
Browse files Browse the repository at this point in the history
  • Loading branch information
elephaint committed Nov 7, 2024
1 parent c1da332 commit c728749
Show file tree
Hide file tree
Showing 10 changed files with 1,396 additions and 3,885 deletions.
7 changes: 7 additions & 0 deletions hierarchicalforecast/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
'hierarchicalforecast/core.py'),
'hierarchicalforecast.core.HierarchicalReconciliation.__init__': ( 'src/core.html#hierarchicalreconciliation.__init__',
'hierarchicalforecast/core.py'),
'hierarchicalforecast.core.HierarchicalReconciliation._prepare_Y': ( 'src/core.html#hierarchicalreconciliation._prepare_y',
'hierarchicalforecast/core.py'),
'hierarchicalforecast.core.HierarchicalReconciliation._prepare_fit': ( 'src/core.html#hierarchicalreconciliation._prepare_fit',
'hierarchicalforecast/core.py'),
'hierarchicalforecast.core.HierarchicalReconciliation.bootstrap_reconcile': ( 'src/core.html#hierarchicalreconciliation.bootstrap_reconcile',
Expand Down Expand Up @@ -202,12 +204,17 @@
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.concat_str': ( 'src/utils.html#concat_str',
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.cov2corr': ( 'src/utils.html#cov2corr',
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.df_constructor': ( 'src/utils.html#df_constructor',
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.group_by_agg_named': ( 'src/utils.html#group_by_agg_named',
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.is_strictly_hierarchical': ( 'src/utils.html#is_strictly_hierarchical',
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.level_to_outputs': ( 'src/utils.html#level_to_outputs',
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.pivot': ('src/utils.html#pivot', 'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.quantiles_to_outputs': ( 'src/utils.html#quantiles_to_outputs',
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.samples_to_quantiles_df': ( 'src/utils.html#samples_to_quantiles_df',
Expand Down
266 changes: 161 additions & 105 deletions hierarchicalforecast/core.py

Large diffs are not rendered by default.

83 changes: 60 additions & 23 deletions hierarchicalforecast/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
from typing import Callable, Dict, List, Optional, Union

import numpy as np
import pandas as pd
import utilsforecast.processing as ufp

from .utils import pivot, df_constructor
from utilsforecast.compat import DFType
from scipy.stats import multivariate_normal

# %% ../nbs/src/evaluation.ipynb 6
Expand Down Expand Up @@ -338,55 +341,89 @@ def __init__(self,
self.evaluators = evaluators

def evaluate(self,
Y_hat_df: pd.DataFrame,
Y_test_df: pd.DataFrame,
Y_hat_df: DFType,
Y_test_df: DFType,
tags: Dict[str, np.ndarray],
Y_df: Optional[pd.DataFrame] = None,
benchmark: Optional[str] = None):
Y_df: Optional[DFType] = None,
benchmark: Optional[str] = None,
id_col: str = "unique_id",
time_col: str = "ds",
target_col: str = "y",
):
"""Hierarchical Evaluation Method.
**Parameters:**<br>
`Y_hat_df`: pd.DataFrame, Forecasts indexed by `'unique_id'` with column `'ds'` and models to evaluate.<br>
`Y_test_df`: pd.DataFrame, True values with columns `['ds', 'y']`.<br>
`Y_hat_df`: DataFrame, Forecasts indexed by `'unique_id'` with column `'ds'` and models to evaluate.<br>
`Y_test_df`: DataFrame, True values with columns `['ds', 'y']`.<br>
`tags`: np.array, each str key is a level and its value contains tags associated to that level.<br>
`Y_df`: pd.DataFrame, Training set of base time series with columns `['ds', 'y']` indexed by `unique_id`.<br>
`Y_df`: DataFrame, Training set of base time series with columns `['ds', 'y']` indexed by `unique_id`.<br>
`benchmark`: str, If passed, evaluators are scaled by the error of this benchark.<br>
`id_col` : str='unique_id', column that identifies each serie.<br>
`time_col` : str='ds', column that identifies each timestep, its values can be timestamps or integers.<br>
`target_col` : str='y', column that contains the target.
**Returns:**<br>
`evaluation`: pd.DataFrame with accuracy measurements across hierarchical levels.
`evaluation`: DataFrame with accuracy measurements across hierarchical levels.
"""
drop_cols = ['ds', 'y'] if 'y' in Y_hat_df.columns else ['ds']
h = len(Y_hat_df.loc[[Y_hat_df.index[0]]])
model_names = Y_hat_df.drop(columns=drop_cols, axis=1).columns.to_list()
n_series = len(set(Y_hat_df[id_col]))
h = len(set(Y_hat_df[time_col]))
if len(Y_hat_df) != n_series * h:
raise Exception('Y_hat_df should have a forecast for each series and horizon')

fn_names = [fn.__name__ for fn in self.evaluators]
has_y_insample = any(['y_insample' in signature(fn).parameters for fn in self.evaluators])
if has_y_insample and Y_df is None:
raise Exception('At least one evaluator needs y insample, please pass `Y_df`')
raise Exception('At least one evaluator needs y_insample, please pass `Y_df`')

if benchmark is not None:
fn_names = [f'{fn_name}-scaled' for fn_name in fn_names]

tags_ = {'Overall': np.concatenate(list(tags.values()))}
tags_ = {**tags_, **tags}
index = pd.MultiIndex.from_product([tags_.keys(), fn_names], names=['level', 'metric'])
evaluation = pd.DataFrame(columns=model_names, index=index)
for level, cats in tags_.items():
Y_h_cats = Y_hat_df.loc[cats]
y_test_cats = Y_test_df.loc[cats, 'y'].values.reshape(-1, h)

model_names = list(set(Y_hat_df.columns) - set([time_col, target_col, id_col]))
evaluation_np = np.empty((len(tags_), len(fn_names), len(model_names)), dtype=np.float64)
evaluation_index_np = np.empty((len(tags_) * len(fn_names), 2), dtype=object)
for i_level, (level, cats) in enumerate(tags_.items()):
mask = ufp.is_in(Y_hat_df[id_col], cats)
Y_h_cats = ufp.filter_with_mask(Y_hat_df, mask)

mask = ufp.is_in(Y_test_df[id_col], cats)
y_test_cats = ufp.filter_with_mask(Y_test_df, mask)[target_col]\
.to_numpy()\
.reshape(-1, h)

if has_y_insample and Y_df is not None:
y_insample = Y_df.pivot(columns='ds', values='y').loc[cats].values
y_insample = pivot(Y_df, index = id_col, columns = time_col, values = target_col)
mask = ufp.is_in(y_insample[id_col], cats)
y_insample = ufp.filter_with_mask(y_insample, mask)
y_insample = ufp.drop_columns(y_insample, id_col)
y_insample = y_insample.to_numpy()

for i_fn, fn in enumerate(self.evaluators):
if 'y_insample' in signature(fn).parameters:
kwargs = {'y_insample': y_insample}
else:
kwargs = {}
fn_name = fn_names[i_fn]
for model in model_names:
loss = fn(y_test_cats, Y_h_cats[model].values.reshape(-1, h), **kwargs)
for i_model, model in enumerate(model_names):
loss = fn(y_test_cats, Y_h_cats[model].to_numpy().reshape(-1, h), **kwargs)
if benchmark is not None:
scale = fn(y_test_cats, Y_h_cats[benchmark].values.reshape(-1, h), **kwargs)
scale = fn(y_test_cats, Y_h_cats[benchmark].to_numpy().reshape(-1, h), **kwargs)
if np.isclose(scale, 0., atol=np.finfo(float).eps):
scale += np.finfo(float).eps
if np.isclose(scale, loss, atol=1e-8):
scale = 1.
loss /= scale
evaluation.loc[(level, fn_name), model] = loss

evaluation_np[i_level, i_fn, i_model] = loss
evaluation_index_np[i_level * len(fn_names) + i_fn, 0] = level
evaluation_index_np[i_level * len(fn_names) + i_fn, 1] = fn_name

evaluation_np = evaluation_np.reshape(-1, len(model_names))
evaluation = df_constructor(dftype=type(Y_hat_df),
X=evaluation_index_np,
columns=["level", "metric"])
evaluation = ufp.assign_columns(evaluation, model_names, evaluation_np)

return evaluation
107 changes: 85 additions & 22 deletions hierarchicalforecast/utils.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,39 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/src/utils.ipynb.

# %% auto 0
__all__ = ['concat_str', 'group_by_agg_named', 'aggregate', 'HierarchicalPlot']
__all__ = ['concat_str', 'group_by_agg_named', 'df_constructor', 'pivot', 'aggregate', 'HierarchicalPlot']

# %% ../nbs/src/utils.ipynb 3
import sys
import timeit
from typing import Dict, List, Optional, Iterable, Union, Sequence
import warnings
from typing import Dict, List, Optional, Iterable, Union, Sequence, TypeVar

import matplotlib.pyplot as plt
import numpy as np
from numba import njit, prange
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from utilsforecast.compat import DFType
from utilsforecast.compat import DataFrame
import utilsforecast.processing as ufp

plt.rcParams['font.family'] = 'serif'

# %% ../nbs/src/utils.ipynb 5
# This code should be moved to utilsforecast
from utilsforecast.compat import DataFrame
import polars as pl
try:
import polars
import polars as pl
from polars import DataFrame as pl_DataFrame

DFType = TypeVar("DFType", pd.DataFrame, polars.DataFrame)
except ImportError:
class pl_DataFrame: ... # type: ignore

DFType = pd.DataFrame # type: ignore

# %% ../nbs/src/utils.ipynb 6
# This code should be moved to utilsforecast
def concat_str(
df: DataFrame,
cols: List[str],
Expand Down Expand Up @@ -54,14 +65,55 @@ def group_by_agg_named(df: DataFrame, by, aggs, maintain_order=False) -> DataFra
)
return out

# %% ../nbs/src/utils.ipynb 6
def df_constructor(dftype: DFType, X: Optional[np.ndarray] = None, columns: Optional[List[str]] = None, sparse: bool = False) -> DataFrame:
"""
Create a DataFrame of type DFType from a numpy array.
"""
if dftype is pd.DataFrame:
if sparse:
df_constructor = pd.DataFrame.sparse.from_spmatrix
else:
df_constructor = pd.DataFrame
df = df_constructor(X, columns=columns)
else:
if sparse:
warnings.warn("Sparse DataFrames are not supported in Polars.")

df = pl_DataFrame(X, schema=columns)

return df

def pivot(df: DataFrame, index: str = "unique_id", columns: str = "ds", values: str = "y", sort: bool = True) -> DataFrame:
"""
Pivot a DataFrame.
"""
if isinstance(df, pd.DataFrame):
pivot_args = {'values': values,
'index': index,
'columns': columns,
'sort': sort,
'dropna': False}
df_pivot = df.pivot_table(**pivot_args)
df_pivot = df_pivot.reset_index()
else:
# Polars
pivot_args = {'values': values,
'index': index,
'on': columns,
'maintain_order': sort}
df_pivot = df.pivot(**pivot_args)
if sort:
df_pivot = df_pivot.sort(by=index)
return df_pivot

# %% ../nbs/src/utils.ipynb 7
# Global variables
NUMBA_NOGIL = True
NUMBA_CACHE = True
NUMBA_PARALLEL = True
NUMBA_FASTMATH = True

# %% ../nbs/src/utils.ipynb 7
# %% ../nbs/src/utils.ipynb 8
class CodeTimer:
def __init__(self, name=None, verbose=True):
self.name = " '" + name + "'" if name else ''
Expand All @@ -76,7 +128,7 @@ def __exit__(self, exc_type, exc_value, traceback):
print('Code block' + self.name + \
' took:\t{0:.5f}'.format(self.took) + ' seconds')

# %% ../nbs/src/utils.ipynb 8
# %% ../nbs/src/utils.ipynb 9
def is_strictly_hierarchical(S: np.ndarray,
tags: Dict[str, np.ndarray]):
# main idea:
Expand All @@ -95,6 +147,23 @@ def is_strictly_hierarchical(S: np.ndarray,
return paths == nodes

# %% ../nbs/src/utils.ipynb 10
def cov2corr(cov, return_std=False):
""" convert covariance matrix to correlation matrix
**Parameters:**<br>
`cov`: array_like, 2d covariance matrix.<br>
`return_std`: bool=False, if True returned std.<br>
**Returns:**<br>
`corr`: ndarray (subclass) correlation matrix
"""
cov = np.asanyarray(cov)
std_ = np.sqrt(np.diag(cov))
corr = cov / np.outer(std_, std_)
if return_std:
return corr, std_
else:
return corr

# %% ../nbs/src/utils.ipynb 12
def _to_upper_hierarchy(bottom_split, bottom_values, upper_key):
upper_split = upper_key.split('/')
upper_idxs = [bottom_split.index(i) for i in upper_split]
Expand All @@ -105,7 +174,7 @@ def join_upper(bottom_value):

return [join_upper(val) for val in bottom_values]

# %% ../nbs/src/utils.ipynb 12
# %% ../nbs/src/utils.ipynb 15
def aggregate(
df: DFType,
spec: List[List[str]],
Expand Down Expand Up @@ -198,20 +267,14 @@ def aggregate(
except TypeError: # sklearn < 1.2
encoder = OneHotEncoder(categories=categories, sparse=sparse_s, dtype=np.float64)
S = encoder.fit_transform(S).T
if isinstance(df, pl.DataFrame):
S_df = pl.DataFrame(S, schema=list(bottom_levels))
else:
df_constructor = pd.DataFrame
if sparse_s:
df_constructor = pd.DataFrame.sparse.from_spmatrix
S_df = df_constructor(S, columns=bottom_levels)
S_df = df_constructor(type(df), S, columns=list(bottom_levels), sparse=sparse_s)

S_df = ufp.assign_columns(S_df, names="unique_id", values=np.hstack(categories))
S_df = S_df[["unique_id"] + list(bottom_levels)]

return Y_df, S_df, tags

# %% ../nbs/src/utils.ipynb 27
# %% ../nbs/src/utils.ipynb 30
class HierarchicalPlot:
""" Hierarchical Plot
Expand Down Expand Up @@ -431,7 +494,7 @@ def plot_hierarchical_predictions_gap(self,
plt.grid()
plt.show()

# %% ../nbs/src/utils.ipynb 42
# %% ../nbs/src/utils.ipynb 45
# convert levels to output quantile names
def level_to_outputs(level:Iterable[int]):
""" Converts list of levels into output names matching StatsForecast and NeuralForecast methods.
Expand Down Expand Up @@ -475,7 +538,7 @@ def quantiles_to_outputs(quantiles:Iterable[float]):
output_names.append('-median')
return quantiles, output_names

# %% ../nbs/src/utils.ipynb 43
# %% ../nbs/src/utils.ipynb 46
# given input array of sample forecasts and inptut quantiles/levels,
# output a Pandas Dataframe with columns of quantile predictions
def samples_to_quantiles_df(samples: np.ndarray,
Expand Down Expand Up @@ -533,7 +596,7 @@ def samples_to_quantiles_df(samples: np.ndarray,

return _quantiles, pd.concat([data,df], axis=1).set_index('unique_id')

# %% ../nbs/src/utils.ipynb 49
# %% ../nbs/src/utils.ipynb 52
# Masked empirical covariance matrix
@njit("Array(float64, 2, 'F')(Array(float64, 2, 'C'), Array(bool, 2, 'C'))", nogil=NUMBA_NOGIL, cache=NUMBA_CACHE, parallel=NUMBA_PARALLEL, fastmath=NUMBA_FASTMATH, error_model="numpy")
# @njit(nogil=NOGIL, cache=CACHE, parallel=True, fastmath=True, error_model="numpy")
Expand Down Expand Up @@ -565,7 +628,7 @@ def _ma_cov(residuals: np.ndarray, not_nan_mask: np.ndarray):

return W

# %% ../nbs/src/utils.ipynb 50
# %% ../nbs/src/utils.ipynb 53
# Shrunk covariance matrix using the Schafer-Strimmer method

@njit("Array(float64, 2, 'F')(Array(float64, 2, 'C'), float64)", nogil=NUMBA_NOGIL, cache=NUMBA_CACHE, parallel=NUMBA_PARALLEL, fastmath=NUMBA_FASTMATH, error_model="numpy")
Expand Down Expand Up @@ -689,7 +752,7 @@ def _shrunk_covariance_schaferstrimmer_with_nans(residuals: np.ndarray, not_nan_

return W

# %% ../nbs/src/utils.ipynb 52
# %% ../nbs/src/utils.ipynb 55
# Lasso cyclic coordinate descent
@njit("Array(float64, 1, 'C')(Array(float64, 2, 'C'), Array(float64, 1, 'C'), float64, int64, float64)", nogil=NUMBA_NOGIL, cache=NUMBA_CACHE, fastmath=NUMBA_FASTMATH, error_model="numpy")
def _lasso(X: np.ndarray, y: np.ndarray,
Expand Down
Loading

0 comments on commit c728749

Please sign in to comment.