From 8fb028b910144bb62903619147aa9ff2ea1457a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20M=C3=A4rz?= Date: Tue, 8 Aug 2023 16:32:22 +0200 Subject: [PATCH] Added tests --- tests/__init__.py | 1 + tests/test_distribution_utils/__init__.py | 1 + .../test_calculate_start_values.py | 24 ++ .../test_compute_gradients_and_hessians.py | 115 ++++++++ .../test_crps_score.py | 21 ++ .../test_dist_select.py | 101 +++++++ .../test_draw_samples.py | 18 ++ .../test_get_params_loss.py | 73 +++++ .../test_loss_fn_start_values.py | 29 ++ .../test_distribution_utils/test_metric_fn.py | 78 ++++++ .../test_objective_fn.py | 97 +++++++ .../test_predict_dist.py | 80 ++++++ .../test_stabilize_derivative.py | 35 +++ tests/test_distributions/__init__.py | 1 + tests/test_distributions/test_expectile.py | 101 +++++++ tests/test_distributions/test_spline_flow.py | 50 ++++ .../test_univariate_cont_distns.py | 35 +++ .../test_univariate_discrete_distns.py | 40 +++ tests/test_flow_utils/__init__.py | 1 + .../test_create_spline_flow.py | 11 + tests/test_flow_utils/test_crps_score.py | 21 ++ .../test_replace_parameters.py | 24 ++ tests/test_model/__init__.py | 1 + tests/test_model/test_model.py | 191 +++++++++++++ tests/test_utils/__init__.py | 1 + tests/test_utils/test_utils.py | 31 +++ tests/utils.py | 258 ++++++++++++++++++ 27 files changed, 1439 insertions(+) create mode 100644 tests/__init__.py create mode 100644 tests/test_distribution_utils/__init__.py create mode 100644 tests/test_distribution_utils/test_calculate_start_values.py create mode 100644 tests/test_distribution_utils/test_compute_gradients_and_hessians.py create mode 100644 tests/test_distribution_utils/test_crps_score.py create mode 100644 tests/test_distribution_utils/test_dist_select.py create mode 100644 tests/test_distribution_utils/test_draw_samples.py create mode 100644 tests/test_distribution_utils/test_get_params_loss.py create mode 100644 tests/test_distribution_utils/test_loss_fn_start_values.py create mode 100644 tests/test_distribution_utils/test_metric_fn.py create mode 100644 tests/test_distribution_utils/test_objective_fn.py create mode 100644 tests/test_distribution_utils/test_predict_dist.py create mode 100644 tests/test_distribution_utils/test_stabilize_derivative.py create mode 100644 tests/test_distributions/__init__.py create mode 100644 tests/test_distributions/test_expectile.py create mode 100644 tests/test_distributions/test_spline_flow.py create mode 100644 tests/test_distributions/test_univariate_cont_distns.py create mode 100644 tests/test_distributions/test_univariate_discrete_distns.py create mode 100644 tests/test_flow_utils/__init__.py create mode 100644 tests/test_flow_utils/test_create_spline_flow.py create mode 100644 tests/test_flow_utils/test_crps_score.py create mode 100644 tests/test_flow_utils/test_replace_parameters.py create mode 100644 tests/test_model/__init__.py create mode 100644 tests/test_model/test_model.py create mode 100644 tests/test_utils/__init__.py create mode 100644 tests/test_utils/test_utils.py create mode 100644 tests/utils.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..89fa3cb --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""LightGBMLSS - An extension of LightGBM to probabilistic forecasting""" \ No newline at end of file diff --git a/tests/test_distribution_utils/__init__.py b/tests/test_distribution_utils/__init__.py new file mode 100644 index 0000000..89fa3cb --- /dev/null +++ b/tests/test_distribution_utils/__init__.py @@ -0,0 +1 @@ +"""LightGBMLSS - An extension of LightGBM to probabilistic forecasting""" \ No newline at end of file diff --git a/tests/test_distribution_utils/test_calculate_start_values.py b/tests/test_distribution_utils/test_calculate_start_values.py new file mode 100644 index 0000000..8e8e375 --- /dev/null +++ b/tests/test_distribution_utils/test_calculate_start_values.py @@ -0,0 +1,24 @@ +from ..utils import BaseTestClass, gen_test_data +import numpy as np + + +class TestClass(BaseTestClass): + def test_calculate_start_values(self, dist_class, loss_fn): + # Create data for testing + _, target, _ = gen_test_data(dist_class) + + # Set the loss function for testing + dist_class.dist.loss_fn = loss_fn + + # Call the objective_fn method + loss, start_values = dist_class.dist.calculate_start_values(target) + + # Assertions + assert isinstance(loss, np.ndarray) + assert not np.isnan(loss).any() + assert not np.isinf(loss).any() + + assert isinstance(start_values, np.ndarray) + assert start_values.shape[0] == dist_class.dist.n_dist_param + assert not np.isnan(start_values).any() + assert not np.isinf(start_values).any() diff --git a/tests/test_distribution_utils/test_compute_gradients_and_hessians.py b/tests/test_distribution_utils/test_compute_gradients_and_hessians.py new file mode 100644 index 0000000..c8fafb0 --- /dev/null +++ b/tests/test_distribution_utils/test_compute_gradients_and_hessians.py @@ -0,0 +1,115 @@ +from ..utils import BaseTestClass, gen_test_data +from typing import List +import numpy as np +import torch + + +class TestClass(BaseTestClass): + def test_compute_gradients_and_hessians(self, dist_class, loss_fn, stabilization): + # Create data for testing + params, target, weights, _ = gen_test_data(dist_class, weights=True) + if dist_class.dist.univariate: + target = torch.tensor(target) + else: + target = torch.tensor(target)[:, :dist_class.dist.n_targets] + start_values = np.array([0.5 for _ in range(dist_class.dist.n_dist_param)]) + + # Set the loss function for testing + dist_class.dist.loss_fn = loss_fn + + # Set the stabilization for testing + dist_class.dist.stabilization = stabilization + + # Call the function + predt, loss = dist_class.dist.get_params_loss(params, target, start_values, requires_grad=True) + grad, hess = dist_class.dist.compute_gradients_and_hessians(loss, predt, weights) + + # Assertions + assert isinstance(predt, List) + for i in range(len(predt)): + assert isinstance(predt[i], torch.Tensor) + assert not torch.isnan(predt[i]).any() + assert not torch.isinf(predt[i]).any() + assert isinstance(loss, torch.Tensor) + assert not torch.isnan(loss).any() + assert not torch.isinf(loss).any() + + assert isinstance(grad, np.ndarray) + assert isinstance(hess, np.ndarray) + assert grad.shape == params.flatten().shape + assert hess.shape == params.flatten().shape + assert not np.isnan(grad).any() + assert not np.isnan(hess).any() + + def test_compute_gradients_and_hessians_crps(self, dist_class_crps, stabilization): + # Create data for testing + params, target, weights, _ = gen_test_data(dist_class_crps, weights=True) + if dist_class_crps.dist.univariate: + target = torch.tensor(target) + else: + target = torch.tensor(target)[:, :dist_class_crps.dist.n_targets] + start_values = np.array([0.5 for _ in range(dist_class_crps.dist.n_dist_param)]) + + # Set the loss function for testing + dist_class_crps.dist.loss_fn = "crps" + + # Set the stabilization for testing + dist_class_crps.dist.stabilization = stabilization + + # Call the function + predt, loss = dist_class_crps.dist.get_params_loss(params, target, start_values, requires_grad=True) + grad, hess = dist_class_crps.dist.compute_gradients_and_hessians(loss, predt, weights) + + # Assertions + assert isinstance(predt, List) + for i in range(len(predt)): + assert isinstance(predt[i], torch.Tensor) + assert not torch.isnan(predt[i]).any() + assert not torch.isinf(predt[i]).any() + assert isinstance(loss, torch.Tensor) + assert not torch.isnan(loss).any() + assert not torch.isinf(loss).any() + + assert isinstance(grad, np.ndarray) + assert isinstance(hess, np.ndarray) + assert grad.shape == params.flatten().shape + assert hess.shape == params.flatten().shape + assert not np.isnan(grad).any() + assert not np.isnan(hess).any() + + def test_compute_gradients_and_hessians_nans(self, dist_class, loss_fn, stabilization): + # Create data for testing + params, target, weights, _ = gen_test_data(dist_class, weights=True) + params[0, 0] = np.nan + if dist_class.dist.univariate: + target = torch.tensor(target) + else: + target = torch.tensor(target)[:, :dist_class.dist.n_targets] + start_values = np.array([0.5 for _ in range(dist_class.dist.n_dist_param)]) + + # Set the loss function for testing + dist_class.dist.loss_fn = loss_fn + + # Set the stabilization for testing + dist_class.dist.stabilization = stabilization + + # Call the function + predt, loss = dist_class.dist.get_params_loss(params, target, start_values, requires_grad=True) + grad, hess = dist_class.dist.compute_gradients_and_hessians(loss, predt, weights) + + # Assertions + assert isinstance(predt, List) + for i in range(len(predt)): + assert isinstance(predt[i], torch.Tensor) + assert not torch.isnan(predt[i]).any() + assert not torch.isinf(predt[i]).any() + assert isinstance(loss, torch.Tensor) + assert not torch.isnan(loss).any() + assert not torch.isinf(loss).any() + + assert isinstance(grad, np.ndarray) + assert isinstance(hess, np.ndarray) + assert grad.shape == params.flatten().shape + assert hess.shape == params.flatten().shape + assert not np.isnan(grad).any() + assert not np.isnan(hess).any() diff --git a/tests/test_distribution_utils/test_crps_score.py b/tests/test_distribution_utils/test_crps_score.py new file mode 100644 index 0000000..97f6411 --- /dev/null +++ b/tests/test_distribution_utils/test_crps_score.py @@ -0,0 +1,21 @@ +from ..utils import BaseTestClass +import torch + + +class TestClass(BaseTestClass): + def test_crps_score(self, dist_class_crps): + # Create data for testing + torch.manual_seed(123) + n_obs = 10 + n_samples = 20 + y = torch.rand(n_obs, 1) + yhat_dist = torch.rand(n_samples, n_obs) + + # Call the function + loss = dist_class_crps.dist.crps_score(y, yhat_dist) + + # Assertions + assert isinstance(loss, torch.Tensor) + assert not torch.isnan(loss).any() + assert not torch.isinf(loss).any() + assert loss.shape == y.shape diff --git a/tests/test_distribution_utils/test_dist_select.py b/tests/test_distribution_utils/test_dist_select.py new file mode 100644 index 0000000..c11bfa4 --- /dev/null +++ b/tests/test_distribution_utils/test_dist_select.py @@ -0,0 +1,101 @@ +from ..utils import BaseTestClass + +from lightgbmlss.distributions import Beta, Gaussian, StudentT, Gamma, Cauchy, LogNormal, Weibull, Gumbel, Laplace +from lightgbmlss.distributions.SplineFlow import * +from lightgbmlss.distributions.distribution_utils import DistributionClass as univariate_dist_class +from lightgbmlss.distributions.flow_utils import NormalizingFlowClass as flow_dist_class + +import numpy as np +import pandas as pd + + +class TestClass(BaseTestClass): + #################################################################################################################### + # Univariate Distribution + #################################################################################################################### + def test_univar_dist_select(self): + # Create data for testing + target = np.array([0.2, 0.4, 0.6, 0.8]).reshape(-1, 1) + candidate_distributions = [Beta, Gaussian, StudentT, Gamma, Cauchy, LogNormal, Weibull, Gumbel, Laplace] + + # Call the function + dist_df = univariate_dist_class().dist_select( + target, candidate_distributions, n_samples=10, plot=False + ).reset_index(drop=True) + + # Assertions + assert isinstance(dist_df, pd.DataFrame) + assert not dist_df.isna().any().any() + assert isinstance(dist_df["distribution"].values[0], str) + assert np.issubdtype(dist_df["nll"].dtype, np.float64) + assert not np.isnan(dist_df["nll"].values).any() + assert not np.isinf(dist_df["nll"].values).any() + + def test_univar_dist_select_plot(self): + # Create data for testing + target = np.array([0.2, 0.4, 0.6, 0.8]).reshape(-1, 1) + candidate_distributions = [Beta, Gaussian, StudentT, Gamma, Cauchy, LogNormal, Weibull, Gumbel, Laplace] + + # Call the function + dist_df = univariate_dist_class().dist_select( + target, candidate_distributions, n_samples=10, plot=True + ).reset_index(drop=True) + + # Assertions + assert isinstance(dist_df, pd.DataFrame) + assert not dist_df.isna().any().any() + assert isinstance(dist_df["distribution"].values[0], str) + assert np.issubdtype(dist_df["nll"].dtype, np.float64) + assert not np.isnan(dist_df["nll"].values).any() + assert not np.isinf(dist_df["nll"].values).any() + + #################################################################################################################### + # Normalizing Flows + #################################################################################################################### + def test_flow_select(self): + # Create data for testing + target = np.array([0.2, 0.4, 0.6, 0.8]).reshape(-1, 1) + bound = np.max([np.abs(target.min()), target.max()]) + target_support = "real" + + candidate_flows = [ + SplineFlow(target_support=target_support, count_bins=2, bound=bound, order="linear"), + SplineFlow(target_support=target_support, count_bins=2, bound=bound, order="quadratic") + ] + + # Call the function + dist_df = flow_dist_class().flow_select( + target, candidate_flows, n_samples=10, plot=False + ).reset_index(drop=True) + + # Assertions + assert isinstance(dist_df, pd.DataFrame) + assert not dist_df.isna().any().any() + assert isinstance(dist_df["NormFlow"].values[0], str) + assert np.issubdtype(dist_df["nll"].dtype, np.float64) + assert not np.isnan(dist_df["nll"].values).any() + assert not np.isinf(dist_df["nll"].values).any() + + def test_flow_select_plot(self): + # Create data for testing + target = np.array([0.2, 0.4, 0.6, 0.8]).reshape(-1, 1) + bound = np.max([np.abs(target.min()), target.max()]) + target_support = "real" + + candidate_flows = [ + SplineFlow(target_support=target_support, count_bins=2, bound=bound, order="linear"), + SplineFlow(target_support=target_support, count_bins=2, bound=bound, order="quadratic") + ] + + # Call the function + dist_df = flow_dist_class().flow_select( + target, candidate_flows, n_samples=10, plot=True + ).reset_index(drop=True) + + # Assertions + assert isinstance(dist_df, pd.DataFrame) + assert not dist_df.isna().any().any() + assert isinstance(dist_df["NormFlow"].values[0], str) + assert np.issubdtype(dist_df["nll"].dtype, np.float64) + assert not np.isnan(dist_df["nll"].values).any() + assert not np.isinf(dist_df["nll"].values).any() diff --git a/tests/test_distribution_utils/test_draw_samples.py b/tests/test_distribution_utils/test_draw_samples.py new file mode 100644 index 0000000..4ee9487 --- /dev/null +++ b/tests/test_distribution_utils/test_draw_samples.py @@ -0,0 +1,18 @@ +from ..utils import BaseTestClass +import pandas as pd +import numpy as np + + +class TestClass(BaseTestClass): + def test_draw_samples(self, dist_class): + # Create data for testing + predt_params = pd.DataFrame(np.array([0.5 for _ in range(dist_class.dist.n_dist_param)], dtype="float32")).T + + # Call the function + dist_samples = dist_class.dist.draw_samples(predt_params) + + # Assertions + if str(dist_class.dist).split(".")[2] != "Expectile": + assert isinstance(dist_samples, (pd.DataFrame, type(None))) + assert not dist_samples.isna().any().any() + assert not np.isinf(dist_samples).any().any() diff --git a/tests/test_distribution_utils/test_get_params_loss.py b/tests/test_distribution_utils/test_get_params_loss.py new file mode 100644 index 0000000..3f7442d --- /dev/null +++ b/tests/test_distribution_utils/test_get_params_loss.py @@ -0,0 +1,73 @@ +from ..utils import BaseTestClass, gen_test_data +from typing import List +import numpy as np +import torch + + +class TestClass(BaseTestClass): + def test_get_params_loss(self, dist_class, loss_fn, requires_grad): + # Create data for testing + predt, target, _ = gen_test_data(dist_class) + target = torch.tensor(target) + start_values = np.array([0.5 for _ in range(dist_class.dist.n_dist_param)]) + + # Set the loss function for testing + dist_class.dist.loss_fn = loss_fn + + # Call the function + predt, loss = dist_class.dist.get_params_loss(predt, target, start_values, requires_grad) + + # Assertions + assert isinstance(predt, List) + for i in range(len(predt)): + assert isinstance(predt[i], torch.Tensor) + assert not torch.isnan(predt[i]).any() + assert not torch.isinf(predt[i]).any() + assert isinstance(loss, torch.Tensor) + assert not torch.isnan(loss).any() + assert not torch.isinf(loss).any() + + def test_get_params_loss_nans(self, dist_class, loss_fn, requires_grad): + # Create data for testing + predt, target, _ = gen_test_data(dist_class) + predt[0, 0] = np.nan + target = torch.tensor(target) + start_values = np.array([0.5 for _ in range(dist_class.dist.n_dist_param)]) + + # Set the loss function for testing + dist_class.dist.loss_fn = loss_fn + + # Call the function + predt, loss = dist_class.dist.get_params_loss(predt, target, start_values, requires_grad) + + # Assertions + assert isinstance(predt, List) + for i in range(len(predt)): + assert isinstance(predt[i], torch.Tensor) + assert not torch.isnan(predt[i]).any() + assert not torch.isinf(predt[i]).any() + assert isinstance(loss, torch.Tensor) + assert not torch.isnan(loss).any() + assert not torch.isinf(loss).any() + + def test_get_params_loss_crps(self, dist_class_crps, requires_grad): + # Create data for testing + predt, target, _ = gen_test_data(dist_class_crps) + target = torch.tensor(target) + start_values = np.array([0.5 for _ in range(dist_class_crps.dist.n_dist_param)]) + + # Set the loss function for testing + dist_class_crps.dist.loss_fn = "crps" + + # Call the function + predt, loss = dist_class_crps.dist.get_params_loss(predt, target, start_values, requires_grad) + + # Assertions + assert isinstance(predt, List) + for i in range(len(predt)): + assert isinstance(predt[i], torch.Tensor) + assert not torch.isnan(predt[i]).any() + assert not torch.isinf(predt[i]).any() + assert isinstance(loss, torch.Tensor) + assert not torch.isnan(loss).any() + assert not torch.isinf(loss).any() diff --git a/tests/test_distribution_utils/test_loss_fn_start_values.py b/tests/test_distribution_utils/test_loss_fn_start_values.py new file mode 100644 index 0000000..1591e4a --- /dev/null +++ b/tests/test_distribution_utils/test_loss_fn_start_values.py @@ -0,0 +1,29 @@ +from ..utils import BaseTestClass, gen_test_data +import torch + + +class TestClass(BaseTestClass): + def test_loss_fn_start_values(self, dist_class, loss_fn): + # Create data for testing + _, target, _ = gen_test_data(dist_class) + predt = [ + torch.tensor(0.5, dtype=torch.float64).reshape(-1, 1).requires_grad_(True) for _ in + range(dist_class.dist.n_dist_param) + ] + if dist_class.dist.univariate: + target = torch.tensor(target) + else: + target = torch.tensor(target)[:, :dist_class.dist.n_targets] + + # Set the loss function for testing + dist_class.dist.loss_fn = loss_fn + + # Call the function + if hasattr(dist_class.dist, "base_dist"): + pass + else: + loss = dist_class.dist.loss_fn_start_values(predt, target) + # Assertions + assert isinstance(loss, torch.Tensor) + assert not torch.isnan(loss).any() + assert not torch.isinf(loss).any() diff --git a/tests/test_distribution_utils/test_metric_fn.py b/tests/test_distribution_utils/test_metric_fn.py new file mode 100644 index 0000000..015c39f --- /dev/null +++ b/tests/test_distribution_utils/test_metric_fn.py @@ -0,0 +1,78 @@ +from ..utils import BaseTestClass, gen_test_data +import numpy as np +import torch + + +class TestClass(BaseTestClass): + def test_metric_fn_weight(self, dist_class, loss_fn): + # Create data for testing + predt, labels, weights, dmatrix = gen_test_data(dist_class, weights=True) + + # Set the loss function for testing + dist_class.dist.loss_fn = loss_fn + + # Call the function + loss_fn, loss, is_higher_better = dist_class.dist.metric_fn(predt, dmatrix) + + # Assertions + assert isinstance(loss_fn, str) + assert isinstance(loss, torch.Tensor) + assert not torch.isnan(loss).any() + assert not torch.isinf(loss).any() + assert isinstance(is_higher_better, bool) + assert not is_higher_better + + def test_metric_fn_no_weight(self, dist_class, loss_fn): + # Create data for testing + predt, labels, dmatrix = gen_test_data(dist_class, weights=False) + + # Set the loss function for testing + dist_class.dist.loss_fn = loss_fn + + # Call the function + loss_fn, loss, is_higher_better = dist_class.dist.metric_fn(predt, dmatrix) + + # Assertions + assert isinstance(loss_fn, str) + assert isinstance(loss, torch.Tensor) + assert not torch.isnan(loss).any() + assert not torch.isinf(loss).any() + assert isinstance(is_higher_better, bool) + assert not is_higher_better + + def test_metric_fn_nans(self, dist_class, loss_fn): + # Create data for testing and et some predt to nan + predt, labels, weights, dmatrix = gen_test_data(dist_class, weights=True) + predt[0, 0] = np.nan + + # Set the loss function for testing + dist_class.dist.loss_fn = loss_fn + + # Call the function + loss_fn, loss, is_higher_better = dist_class.dist.metric_fn(predt, dmatrix) + + # Assertions + assert isinstance(loss_fn, str) + assert isinstance(loss, torch.Tensor) + assert not torch.isnan(loss).any() + assert not torch.isinf(loss).any() + assert isinstance(is_higher_better, bool) + assert not is_higher_better + + def test_metric_fn_crps(self, dist_class_crps): + # Create data for testing + predt, labels, weights, dmatrix = gen_test_data(dist_class_crps, weights=True) + + # Set the loss function for testing + dist_class_crps.dist.loss_fn = "crps" + + # Call the function + loss_fn, loss, is_higher_better = dist_class_crps.dist.metric_fn(predt, dmatrix) + + # Assertions + assert isinstance(loss_fn, str) + assert isinstance(loss, torch.Tensor) + assert not torch.isnan(loss).any() + assert not torch.isinf(loss).any() + assert isinstance(is_higher_better, bool) + assert not is_higher_better diff --git a/tests/test_distribution_utils/test_objective_fn.py b/tests/test_distribution_utils/test_objective_fn.py new file mode 100644 index 0000000..2e8561b --- /dev/null +++ b/tests/test_distribution_utils/test_objective_fn.py @@ -0,0 +1,97 @@ +from ..utils import BaseTestClass, gen_test_data +import numpy as np + + +class TestClass(BaseTestClass): + def test_objective_fn_weights(self, dist_class, loss_fn, stabilization): + # Create data for testing + predt, labels, weights, dmatrix = gen_test_data(dist_class, weights=True) + + # Set the loss function for testing + dist_class.dist.loss_fn = loss_fn + + # Set the stabilization for testing + dist_class.dist.stabilization = stabilization + + # Call the function + grad, hess = dist_class.dist.objective_fn(predt, dmatrix) + + # Assertions + assert isinstance(grad, np.ndarray) + assert isinstance(hess, np.ndarray) + assert grad.shape == predt.flatten().shape + assert hess.shape == predt.flatten().shape + assert not np.isnan(grad).any() + assert not np.isnan(hess).any() + assert not np.isinf(grad).any() + assert not np.isinf(hess).any() + + def test_objective_fn_no_weights(self, dist_class, loss_fn, stabilization): + # Create data for testing + predt, labels, dmatrix = gen_test_data(dist_class, weights=False) + + # Set the loss function for testing + dist_class.dist.loss_fn = loss_fn + + # Set the stabilization for testing + dist_class.dist.stabilization = stabilization + + # Call the function + grad, hess = dist_class.dist.objective_fn(predt, dmatrix) + + # Assertions + assert isinstance(grad, np.ndarray) + assert isinstance(hess, np.ndarray) + assert grad.shape == predt.flatten().shape + assert hess.shape == predt.flatten().shape + assert not np.isnan(grad).any() + assert not np.isnan(hess).any() + assert not np.isinf(grad).any() + assert not np.isinf(hess).any() + + def test_objective_fn_nans(self, dist_class, loss_fn, stabilization): + # Create data for testing and et some predt to nan + predt, labels, weights, dmatrix = gen_test_data(dist_class, weights=True) + predt[0, 0] = np.nan + + # Set the loss function for testing + dist_class.dist.loss_fn = loss_fn + + # Set the stabilization for testing + dist_class.dist.stabilization = stabilization + + # Call the function + grad, hess = dist_class.dist.objective_fn(predt, dmatrix) + + # Assertions + assert isinstance(grad, np.ndarray) + assert isinstance(hess, np.ndarray) + assert grad.shape == predt.flatten().shape + assert hess.shape == predt.flatten().shape + assert not np.isnan(grad).any() + assert not np.isnan(hess).any() + assert not np.isinf(grad).any() + assert not np.isinf(hess).any() + + def test_objective_fn_crps(self, dist_class_crps, stabilization): + # Create data for testing + predt, labels, weights, dmatrix = gen_test_data(dist_class_crps, weights=True) + + # Set the loss function for testing + dist_class_crps.dist.loss_fn = "crps" + + # Set the stabilization for testing + dist_class_crps.dist.stabilization = stabilization + + # Call the function + grad, hess = dist_class_crps.dist.objective_fn(predt, dmatrix) + + # Assertions + assert isinstance(grad, np.ndarray) + assert isinstance(hess, np.ndarray) + assert grad.shape == predt.flatten().shape + assert hess.shape == predt.flatten().shape + assert not np.isnan(grad).any() + assert not np.isnan(hess).any() + assert not np.isinf(grad).any() + assert not np.isinf(hess).any() diff --git a/tests/test_distribution_utils/test_predict_dist.py b/tests/test_distribution_utils/test_predict_dist.py new file mode 100644 index 0000000..299ef4c --- /dev/null +++ b/tests/test_distribution_utils/test_predict_dist.py @@ -0,0 +1,80 @@ +from ..utils import BaseTestClass +import numpy as np +import pandas as pd +import lightgbm as lgb + + +class TestClass(BaseTestClass): + #################################################################################################################### + # Univariate Distribution + #################################################################################################################### + def test_predict_dist_univariate(self, dist_class, pred_type): + if dist_class.dist.univariate and not hasattr(dist_class.dist, "base_dist"): + # Create data for testing + np.random.seed(123) + X_dta = pd.DataFrame(np.random.rand(100).reshape(-1, 1)) + y_dta = np.random.rand(100) + dtrain = lgb.Dataset(X_dta, label=y_dta) + + # Train the model + params = {"eta": 0.01} + dist_class.train(params, dtrain, num_boost_round=2) + + # Call the function + if dist_class.dist.tau is not None and pred_type in ["quantiles", "samples"]: + pred_type = "parameters" + predt_df = dist_class.dist.predict_dist(dist_class.booster, + X_dta, + dist_class.start_values, + pred_type, + n_samples=100, + quantiles=[0.1, 0.5, 0.9] + ) + + # Assertions + assert isinstance(predt_df, pd.DataFrame) + assert not predt_df.isna().any().any() + assert not np.isinf(predt_df).any().any() + if pred_type == "parameters" or pred_type == "expectiles": + assert predt_df.shape[1] == dist_class.dist.n_dist_param + if dist_class.dist.tau is None: + if pred_type == "samples": + assert predt_df.shape[1] == 100 + elif pred_type == "quantiles": + assert predt_df.shape[1] == 3 + + #################################################################################################################### + # Normalizing Flow + #################################################################################################################### + def test_predict_dist_flow(self, flow_class, pred_type): + # Create data for testing + np.random.seed(123) + X_dta = pd.DataFrame(np.random.rand(100).reshape(-1, 1)) + y_dta = np.random.rand(100) + dtrain = lgb.Dataset(X_dta, label=y_dta) + + # Train the model + params = {"eta": 0.01} + flow_class.train(params, dtrain, num_boost_round=2) + + # Call the function + if pred_type in ["expectiles"]: + pred_type = "parameters" + predt_df = flow_class.dist.predict_dist(flow_class.booster, + X_dta, + flow_class.start_values, + pred_type, + n_samples=100, + quantiles=[0.1, 0.5, 0.9] + ) + + # Assertions + assert isinstance(predt_df, pd.DataFrame) + assert not predt_df.isna().any().any() + assert not np.isinf(predt_df).any().any() + if pred_type == "parameters" or pred_type == "expectiles": + assert predt_df.shape[1] == flow_class.dist.n_dist_param + if pred_type == "samples": + assert predt_df.shape[1] == 100 + elif pred_type == "quantiles": + assert predt_df.shape[1] == 3 diff --git a/tests/test_distribution_utils/test_stabilize_derivative.py b/tests/test_distribution_utils/test_stabilize_derivative.py new file mode 100644 index 0000000..5a240e5 --- /dev/null +++ b/tests/test_distribution_utils/test_stabilize_derivative.py @@ -0,0 +1,35 @@ +from ..utils import BaseTestClass +import torch + + +class TestClass(BaseTestClass): + def test_stabilize_derivative(self, dist_class, stabilization): + # Create data for testing + torch.manual_seed(123) + input_der = torch.rand((10, 1), dtype=torch.float64) + + # Call the function + stab_der = dist_class.dist.stabilize_derivative(input_der, stabilization) + + # Assertions + assert isinstance(stab_der, torch.Tensor) + assert stab_der.shape == input_der.shape + assert not torch.isnan(stab_der).any() + assert not torch.isinf(stab_der).any() + if stabilization == "None": + assert torch.equal(input_der, stab_der) + + def test_stabilize_derivative_nans(self, dist_class, stabilization): + # Create data for testing + torch.manual_seed(123) + input_der = torch.rand((10, 1), dtype=torch.float64) + input_der[0] = torch.tensor([float("nan")]) + + # Call the function + stab_der = dist_class.dist.stabilize_derivative(input_der, stabilization) + + # Assertions + assert isinstance(stab_der, torch.Tensor) + assert stab_der.shape == input_der.shape + assert not torch.isnan(stab_der).any() + assert not torch.isinf(stab_der).any() diff --git a/tests/test_distributions/__init__.py b/tests/test_distributions/__init__.py new file mode 100644 index 0000000..89fa3cb --- /dev/null +++ b/tests/test_distributions/__init__.py @@ -0,0 +1 @@ +"""LightGBMLSS - An extension of LightGBM to probabilistic forecasting""" \ No newline at end of file diff --git a/tests/test_distributions/test_expectile.py b/tests/test_distributions/test_expectile.py new file mode 100644 index 0000000..42f420e --- /dev/null +++ b/tests/test_distributions/test_expectile.py @@ -0,0 +1,101 @@ +from ..utils import BaseTestClass +from lightgbmlss.distributions.Expectile import * +import torch +import pytest + + +class TestClass(BaseTestClass): + def test_init(self, expectile_dist): + assert isinstance(expectile_dist().stabilization, str) + assert expectile_dist().stabilization is not None + with pytest.raises(ValueError, match="Invalid stabilization method."): + expectile_dist(stabilization="invalid_stabilization") + + with pytest.raises(ValueError, match="Expectiles must be a list."): + expectile_dist(expectiles=0.1) + + with pytest.raises(ValueError, match="Expectiles must be between 0 and 1."): + expectile_dist(expectiles=[-0.1, 0.1, 1.1]) + + with pytest.raises(ValueError, match="penalize_crossing must be a boolean."): + expectile_dist(penalize_crossing=0.1) + + assert isinstance(expectile_dist().loss_fn, str) + assert expectile_dist().loss_fn is not None + + def test_expectile_distribution_parameters(self, expectile_dist): + assert isinstance(expectile_dist().param_dict, dict) + assert set(expectile_dist().param_dict.keys()) == set(expectile_dist().distribution_arg_names) + assert all(callable(func) for func in expectile_dist().param_dict.values()) + assert expectile_dist().n_dist_param == len(expectile_dist().distribution_arg_names) + assert isinstance(expectile_dist().n_dist_param, int) + assert isinstance(expectile_dist().distribution_arg_names, list) + assert isinstance(expectile_dist().tau, torch.Tensor) + + def test_defaults(self, expectile_dist): + assert isinstance(expectile_dist().univariate, bool) + assert expectile_dist().univariate is True + assert isinstance(expectile_dist().discrete, bool) + assert expectile_dist().discrete is False + assert expectile_dist().tau is not None + assert isinstance(expectile_dist().penalize_crossing, bool) + + def test_expectile_init(self): + # Create an instance of Expectile_Torch with example expectiles + expectiles = [torch.tensor([0.1, 0.5, 0.9])] + expectile_instance = Expectile_Torch(expectiles) + + # Assertions + assert expectile_instance.expectiles == expectiles + assert isinstance(expectile_instance.penalize_crossing, bool) + assert expectile_instance.__class__.__name__ == "Expectile" + + def test_expectile_log_prob(self): + # Create an instance of Expectile_Torch with example expectiles + expectiles = [torch.tensor([0.1, 0.5, 0.9])] + expectile_instance_penalize = Expectile_Torch(expectiles, penalize_crossing=True) + expectile_instance_no_penalize = Expectile_Torch(expectiles, penalize_crossing=False) + value = torch.tensor([0.2, 0.4, 0.6, 0.8]) + + # Call the function + loss_penalize = expectile_instance_penalize.log_prob(value, expectiles) + loss_no_penalize = expectile_instance_no_penalize.log_prob(value, expectiles) + + # Assertions + assert isinstance(loss_penalize, torch.Tensor) + assert not torch.isnan(loss_penalize).any() + assert not torch.isinf(loss_penalize).any() + + assert isinstance(loss_no_penalize, torch.Tensor) + assert not torch.isnan(loss_no_penalize).any() + assert not torch.isinf(loss_no_penalize).any() + + +def test_expectile_pnorm(): + # Create example data + tau = np.array([0.5], dtype="float") + m = np.array([0.2, 0.4, 0.8]).reshape(-1, 1) + sd = np.array([0.1, 0.2, 0.3]).reshape(-1, 1) + + # Call the function + out = expectile_pnorm(tau, m, sd) + + # Assertions + assert isinstance(out, np.ndarray) + assert not np.isnan(out).any() + assert not np.isinf(out).any() + + +def test_expectile_norm(): + # Create example data + tau = np.array([0.5], dtype="float") + m = np.array([0.2, 0.4, 0.8]).reshape(-1, 1) + sd = np.array([0.1, 0.2, 0.3]).reshape(-1, 1) + + # Call the function + out = expectile_norm(tau, m, sd) + + # Assertions + assert isinstance(out, np.ndarray) + assert not np.isnan(out).any() + assert not np.isinf(out).any() diff --git a/tests/test_distributions/test_spline_flow.py b/tests/test_distributions/test_spline_flow.py new file mode 100644 index 0000000..26727e3 --- /dev/null +++ b/tests/test_distributions/test_spline_flow.py @@ -0,0 +1,50 @@ +from ..utils import BaseTestClass +import pytest + + +class TestClass(BaseTestClass): + def test_init(self, flow_dist): + with pytest.raises(ValueError, match="target_support must be a string."): + flow_dist(target_support=1) + with pytest.raises(ValueError, match="Invalid target_support."): + flow_dist(target_support="invalid_target_support") + + with pytest.raises(ValueError, match="count_bins must be an integer."): + flow_dist(count_bins=1.0) + flow_dist(count_bins="1.0") + with pytest.raises(ValueError, match="count_bins must be a positive integer > 0"): + flow_dist(count_bins=0) + + with pytest.raises(ValueError, match="bound must be a float."): + flow_dist(bound=1) + flow_dist(bound="1") + + with pytest.raises(ValueError, match="order must be a string."): + flow_dist(order=1) + flow_dist(order="invalid_order") + + with pytest.raises(ValueError, match="Invalid order specification."): + flow_dist(order="invalid_order") + + assert isinstance(flow_dist().stabilization, str) + assert flow_dist().stabilization is not None + with pytest.raises(ValueError, match="Invalid stabilization method."): + flow_dist(stabilization="invalid_stabilization") + with pytest.raises(ValueError, match="stabilization must be a string."): + flow_dist(stabilization=1) + + assert isinstance(flow_dist().loss_fn, str) + assert flow_dist().loss_fn is not None + with pytest.raises(ValueError, match="loss_fn must be a string."): + flow_dist(loss_fn=1) + with pytest.raises(ValueError, match="Invalid loss_fn."): + flow_dist(loss_fn="invalid_loss_fn") + + def test_distribution_parameters(self, flow_dist): + assert isinstance(flow_dist().param_dict, dict) + assert all(callable(func) for func in flow_dist().param_dict.values()) + + def test_defaults(self, flow_dist): + assert isinstance(flow_dist().univariate, bool) + assert flow_dist().univariate is True + assert isinstance(flow_dist().discrete, bool) diff --git a/tests/test_distributions/test_univariate_cont_distns.py b/tests/test_distributions/test_univariate_cont_distns.py new file mode 100644 index 0000000..02f527b --- /dev/null +++ b/tests/test_distributions/test_univariate_cont_distns.py @@ -0,0 +1,35 @@ +from ..utils import BaseTestClass +import pytest + + +class TestClass(BaseTestClass): + def test_init(self, univariate_cont_dist): + assert isinstance(univariate_cont_dist().stabilization, str) + assert univariate_cont_dist().stabilization is not None + with pytest.raises(ValueError, match="Invalid stabilization method."): + univariate_cont_dist(stabilization="invalid_stabilization") + + with pytest.raises(ValueError, match="Invalid response function."): + univariate_cont_dist(response_fn="invalid_response_fn") + + assert isinstance(univariate_cont_dist().loss_fn, str) + assert univariate_cont_dist().loss_fn is not None + with pytest.raises(ValueError, match="Invalid loss function."): + univariate_cont_dist(loss_fn="invalid_loss_fn") + + def test_distribution_parameters(self, univariate_cont_dist): + assert isinstance(univariate_cont_dist().param_dict, dict) + assert set(univariate_cont_dist().param_dict.keys()) == set(univariate_cont_dist().distribution_arg_names) + assert all(callable(func) for func in univariate_cont_dist().param_dict.values()) + assert univariate_cont_dist().n_dist_param == len(univariate_cont_dist().distribution_arg_names) + assert isinstance(univariate_cont_dist().n_dist_param, int) + assert isinstance(univariate_cont_dist().distribution_arg_names, list) + assert univariate_cont_dist().distribution_arg_names == list(univariate_cont_dist().distribution.arg_constraints.keys()) + + def test_defaults(self, univariate_cont_dist): + assert isinstance(univariate_cont_dist().univariate, bool) + assert univariate_cont_dist().univariate is True + assert isinstance(univariate_cont_dist().discrete, bool) + assert univariate_cont_dist().discrete is False + assert univariate_cont_dist().tau is None + assert isinstance(univariate_cont_dist().penalize_crossing, bool) diff --git a/tests/test_distributions/test_univariate_discrete_distns.py b/tests/test_distributions/test_univariate_discrete_distns.py new file mode 100644 index 0000000..c355d80 --- /dev/null +++ b/tests/test_distributions/test_univariate_discrete_distns.py @@ -0,0 +1,40 @@ +from ..utils import BaseTestClass +import pytest + + +class TestClass(BaseTestClass): + def test_init(self, univariate_discrete_dist): + assert isinstance(univariate_discrete_dist().stabilization, str) + assert univariate_discrete_dist().stabilization is not None + with pytest.raises(ValueError, match="Invalid stabilization method."): + univariate_discrete_dist(stabilization="invalid_stabilization") + + if univariate_discrete_dist.__name__ in ["NegativeBinomial", "ZINB"]: + with pytest.raises(ValueError, match="Invalid response function for total_count."): + univariate_discrete_dist(response_fn_total_count="invalid_response_fn") + with pytest.raises(ValueError, match="Invalid response function for probs."): + univariate_discrete_dist(response_fn_probs="invalid_response_fn") + else: + with pytest.raises(ValueError, match="Invalid response function."): + univariate_discrete_dist(response_fn="invalid_response_fn") + + assert isinstance(univariate_discrete_dist().loss_fn, str) + assert univariate_discrete_dist().loss_fn is not None + with pytest.raises(ValueError, match="Invalid loss function."): + univariate_discrete_dist(loss_fn="invalid_loss_fn") + + def test_distribution_parameters(self, univariate_discrete_dist): + assert isinstance(univariate_discrete_dist().param_dict, dict) + assert set(univariate_discrete_dist().param_dict.keys()) == set(univariate_discrete_dist().distribution_arg_names) + assert all(callable(func) for func in univariate_discrete_dist().param_dict.values()) + assert univariate_discrete_dist().n_dist_param == len(univariate_discrete_dist().distribution_arg_names) + assert isinstance(univariate_discrete_dist().n_dist_param, int) + assert isinstance(univariate_discrete_dist().distribution_arg_names, list) + + def test_defaults(self, univariate_discrete_dist): + assert isinstance(univariate_discrete_dist().univariate, bool) + assert univariate_discrete_dist().univariate is True + assert isinstance(univariate_discrete_dist().discrete, bool) + assert univariate_discrete_dist().discrete is True + assert univariate_discrete_dist().tau is None + assert isinstance(univariate_discrete_dist().penalize_crossing, bool) diff --git a/tests/test_flow_utils/__init__.py b/tests/test_flow_utils/__init__.py new file mode 100644 index 0000000..89fa3cb --- /dev/null +++ b/tests/test_flow_utils/__init__.py @@ -0,0 +1 @@ +"""LightGBMLSS - An extension of LightGBM to probabilistic forecasting""" \ No newline at end of file diff --git a/tests/test_flow_utils/test_create_spline_flow.py b/tests/test_flow_utils/test_create_spline_flow.py new file mode 100644 index 0000000..85da2bc --- /dev/null +++ b/tests/test_flow_utils/test_create_spline_flow.py @@ -0,0 +1,11 @@ +from ..utils import BaseTestClass +from pyro.distributions import TransformedDistribution + + +class TestClass(BaseTestClass): + def test_create_spline_flow(self, flow_class): + # Create normalizing flow + gen_flow = flow_class.dist.create_spline_flow(input_dim=1) + + # Assertions + assert isinstance(gen_flow, TransformedDistribution) diff --git a/tests/test_flow_utils/test_crps_score.py b/tests/test_flow_utils/test_crps_score.py new file mode 100644 index 0000000..0ff688d --- /dev/null +++ b/tests/test_flow_utils/test_crps_score.py @@ -0,0 +1,21 @@ +from ..utils import BaseTestClass +import torch + + +class TestClass(BaseTestClass): + def test_crps_score(self, flow_class): + # Create data for testing + torch.manual_seed(123) + n_obs = 10 + n_samples = 20 + y = torch.rand(n_obs, 1) + yhat_dist = torch.rand(n_samples, n_obs) + + # Call the function + loss = flow_class.dist.crps_score(y, yhat_dist) + + # Assertions + assert isinstance(loss, torch.Tensor) + assert not torch.isnan(loss).any() + assert not torch.isinf(loss).any() + assert loss.shape == y.shape diff --git a/tests/test_flow_utils/test_replace_parameters.py b/tests/test_flow_utils/test_replace_parameters.py new file mode 100644 index 0000000..d02f0ff --- /dev/null +++ b/tests/test_flow_utils/test_replace_parameters.py @@ -0,0 +1,24 @@ +from ..utils import BaseTestClass +from pyro.distributions import TransformedDistribution +from typing import List +import numpy as np +import torch + + +class TestClass(BaseTestClass): + def test_replace_parameters(self, flow_class): + # Specify Normalizing Flow + predt = np.array([0.5 for _ in range(flow_class.dist.n_dist_param)]).reshape(-1, 1).T + predt = torch.tensor(predt, dtype=torch.float32) + flow_dist = flow_class.dist.create_spline_flow(input_dim=1) + + # Cal the function + params, flow_dist = flow_class.dist.replace_parameters(predt, flow_dist) + + # Assertions + assert isinstance(flow_dist, TransformedDistribution) + assert isinstance(params, List) + for i in range(len(params)): + assert isinstance(params[i], torch.Tensor) + assert not torch.isnan(params[i]).any() + assert not torch.isinf(params[i]).any() diff --git a/tests/test_model/__init__.py b/tests/test_model/__init__.py new file mode 100644 index 0000000..89fa3cb --- /dev/null +++ b/tests/test_model/__init__.py @@ -0,0 +1 @@ +"""LightGBMLSS - An extension of LightGBM to probabilistic forecasting""" \ No newline at end of file diff --git a/tests/test_model/test_model.py b/tests/test_model/test_model.py new file mode 100644 index 0000000..8b7101b --- /dev/null +++ b/tests/test_model/test_model.py @@ -0,0 +1,191 @@ +from lightgbmlss.model import * +from lightgbmlss.distributions.Gaussian import * +from lightgbmlss.distributions.Expectile import * +from lightgbmlss.datasets.data_loader import load_simulated_gaussian_data +import pytest +from pytest import approx + + +@pytest.fixture +def univariate_data(): + train, test = load_simulated_gaussian_data() + X_train, y_train = train.filter(regex="x"), train["y"].values + X_test, y_test = test.filter(regex="x"), test["y"].values + dtrain = lgb.Dataset(X_train, label=y_train) + dtest = lgb.Dataset(X_test) + deval = lgb.Dataset(X_test, label=y_test) + + return dtrain, dtest, deval, X_test + + +@pytest.fixture +def univariate_lgblss(): + return LightGBMLSS(Gaussian()) + + +@pytest.fixture +def expectile_lgblss(): + return LightGBMLSS(Expectile()) + + +@pytest.fixture +def univariate_params(): + opt_params = { + "eta": 0.06554395841226755, + "max_depth": 3, + "num_leaves": 255, + "min_data_in_leaf": 20, + "min_gain_to_split": 5.76808477078835, + "min_sum_hessian_in_leaf": 9.446692680480123e-05, + "subsample": 0.3022671193739115, + "feature_fraction": 0.7869489723419915, + "boosting": "gbdt" + } + n_rounds = 46 + + return opt_params, n_rounds + + +@pytest.fixture +def expectile_params(): + opt_params = { + "eta": 0.669098091972402, + "max_depth": 2, + "num_leaves": 255, + "min_data_in_leaf": 20, + "min_gain_to_split": 33.016324935465434, + "min_sum_hessian_in_leaf": 60.4377077445418, + "subsample": 0.8748337817075426, + "feature_fraction": 0.9497140456000938, + "boosting": "gbdt" + } + n_rounds = 2 + + return opt_params, n_rounds + + +class TestClass: + def test_model_univ_train(self, univariate_data, univariate_lgblss, univariate_params): + # Unpack + dtrain, _, _, _ = univariate_data + opt_params, n_rounds = univariate_params + lgblss = univariate_lgblss + + # Train the model + lgblss.train(opt_params, dtrain, n_rounds) + + # Assertions + assert isinstance(lgblss.booster, lgb.Booster) + + def test_model_univ_train_eval(self, univariate_data, univariate_lgblss, univariate_params): + # Unpack + dtrain, dtest, deval, _ = univariate_data + opt_params, n_rounds = univariate_params + lgblss = univariate_lgblss + + # Add evaluation set + valid_sets = [dtrain, deval] + valid_names = ["train", "evaluation"] + + # Train the model + lgblss.train(opt_params, dtrain, n_rounds, valid_sets=valid_sets, valid_names=valid_names) + + # Assertions + assert isinstance(lgblss.booster, lgb.Booster) + + def test_model_hpo(self, univariate_data, univariate_lgblss,): + # Unpack + dtrain, _, _, _ = univariate_data + lgblss = univariate_lgblss + + # Create hyperparameter dictionary + param_dict = { + "eta": ["float", {"low": 1e-5, "high": 1, "log": True}], + "max_depth": ["int", {"low": 1, "high": 2, "log": False}], + "device_type": ["categorical", ["cpu"]], + } + + # Train the model + np.random.seed(123) + opt_param = lgblss.hyper_opt( + param_dict, + dtrain, + num_boost_round=10, + nfold=5, + early_stopping_rounds=20, + max_minutes=10, + n_trials=5, + silence=True, + seed=123, + hp_seed=123 + ) + + # Assertions + assert isinstance(opt_param, dict) + + def test_model_predict(self, univariate_data, univariate_lgblss, univariate_params): + # Unpack + dtrain, _, _, X_test = univariate_data + opt_params, n_rounds = univariate_params + lgblss = univariate_lgblss + + # Train the model + lgblss.train(opt_params, dtrain, n_rounds) + + # Call the predict method + n_samples = 100 + quantiles = [0.1, 0.5, 0.9] + + pred_params = lgblss.predict(X_test, pred_type="parameters") + pred_samples = lgblss.predict(X_test, pred_type="samples", n_samples=n_samples) + pred_quantiles = lgblss.predict(X_test, pred_type="quantiles", quantiles=quantiles) + + # Assertions + assert isinstance(pred_params, (pd.DataFrame, type(None))) + assert not pred_params.isna().any().any() + assert not np.isinf(pred_params).any().any() + assert pred_params.shape[1] == lgblss.dist.n_dist_param + assert approx(pred_params["loc"].mean(), abs=0.2) == 10.0 + + assert isinstance(pred_samples, (pd.DataFrame, type(None))) + assert not pred_samples.isna().any().any() + assert not np.isinf(pred_samples).any().any() + assert pred_samples.shape[1] == n_samples + + assert isinstance(pred_quantiles, (pd.DataFrame, type(None))) + assert not pred_quantiles.isna().any().any() + assert not np.isinf(pred_quantiles).any().any() + assert pred_quantiles.shape[1] == len(quantiles) + + def test_model_plot(self, univariate_data, univariate_lgblss, univariate_params): + # Unpack + dtrain, dtest, _, X_test = univariate_data + opt_params, n_rounds = univariate_params + lgblss = univariate_lgblss + + # Train the model + lgblss.train(opt_params, dtrain, n_rounds) + + # Call the function + lgblss.plot(X_test, parameter="scale", feature="x_true", plot_type="Partial_Dependence") + lgblss.plot(X_test, parameter="scale", feature="x_true", plot_type="Feature_Importance") + + def test_model_expectile_plot(self, univariate_data, expectile_lgblss, expectile_params): + # Unpack + dtrain, dtest, _, X_test = univariate_data + opt_params, n_rounds = expectile_params + lgblss_expectile = expectile_lgblss + + # Train the model + lgblss_expectile.train(opt_params, dtrain, n_rounds) + + # Call the function + lgblss_expectile.expectile_plot(X_test, + expectile="expectile_0.9", + feature="x_true", + plot_type="Partial_Dependence") + + lgblss_expectile.expectile_plot(X_test, + expectile="expectile_0.9", + feature="x_true", + plot_type="Feature_Importance") diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py new file mode 100644 index 0000000..89fa3cb --- /dev/null +++ b/tests/test_utils/__init__.py @@ -0,0 +1 @@ +"""LightGBMLSS - An extension of LightGBM to probabilistic forecasting""" \ No newline at end of file diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py new file mode 100644 index 0000000..cc4c5d1 --- /dev/null +++ b/tests/test_utils/test_utils.py @@ -0,0 +1,31 @@ +import pytest +import torch +from lightgbmlss import utils + + +def get_response_fn(): + functions_list = [fn for fn in dir(utils) if "_fn" in fn] + + func_list = [] + for func_name in functions_list: + func_list.append(getattr(utils, func_name)) + + return func_list + + +class TestClass: + @pytest.fixture(params=get_response_fn()) + def response_fn(self, request): + return request.param + + def test_response_fn(self, response_fn): + # Create Data for testing + predt = torch.tensor([1.0, 2.0, 3.0, 4.0]) + + # Call the function + predt_transformed = response_fn(predt) + + # Assertions + assert isinstance(predt_transformed, torch.Tensor) + assert not torch.isnan(predt_transformed).any() + assert not torch.isinf(predt_transformed).any() diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..24ad2ef --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,258 @@ +from lightgbmlss.model import LightGBMLSS +from lightgbmlss import distributions +import pytest +import importlib +from typing import List +import torch +import numpy as np +import lightgbm as lgb + + +def gen_test_data(dist_class, weights: bool = False): + """ + Function that generates test data for a given distribution class. + + Arguments: + ---------- + dist_class (class): + Distribution class. + weights (bool): + Whether to generate weights. + + Returns: + -------- + predt (np.ndarray): + Predictions. + labels (np.ndarray): + Labels. + weights (np.ndarray): + Weights. + dmatrix (lgb.Dataset): + DMatrix. + """ + if dist_class.dist.univariate: + np.random.seed(123) + predt = np.random.rand(dist_class.dist.n_dist_param * 4).reshape(-1, dist_class.dist.n_dist_param) + labels = np.array([0.2, 0.4, 0.6, 0.8]).reshape(-1, 1) + if weights: + weights = np.ones_like(labels) + dmatrix = lgb.Dataset(predt, label=labels, weight=weights) + dist_class.set_init_score(dmatrix) + + return predt, labels, weights, dmatrix + else: + dmatrix = lgb.Dataset(predt, label=labels) + dist_class.set_init_score(dmatrix) + + return predt, labels, dmatrix + else: + np.random.seed(123) + predt = np.random.rand(dist_class.dist.n_dist_param * 4).reshape(-1, dist_class.dist.n_dist_param) + labels = np.arange(0.1, 0.9, 0.1) + labels = dist_class.dist.target_append( + labels, + dist_class.dist.n_targets, + dist_class.dist.n_dist_param + ) + if weights: + weights = np.ones_like(labels[:, 0], dtype=labels.dtype).reshape(-1, 1) + dmatrix = lgb.Dataset(predt, label=labels, weight=weights) + dist_class.set_init_score(dmatrix) + + return predt, labels, weights, dmatrix + else: + dmatrix = lgb.Dataset(predt, label=labels) + dist_class.set_init_score(dmatrix) + + return predt, labels, dmatrix + + +def get_distribution_classes(univariate: bool = True, + continuous: bool = False, + discrete: bool = False, + rsample: bool = False, + flow: bool = False, + expectile: bool = False, + ) -> List: + """ + Function that returns a list of specified distribution classes. + + Arguments: + --------- + univariate (bool): + If True, only return distribution classes that are univariate. + continuous (bool): + If True, only return distribution classes that are continuous. + discrete (bool): + If True, only return distribution classes that are discrete. + rsample (bool): + If True, only return distribution classes that have a rsample method. + flow (bool): + If True, only return distribution classes that are Flows. + + Returns: + -------- + distribution_classes (List): + List of all distribution classes in the distributions folder. + """ + # Get all distribution names + distns = [dist for dist in dir(distributions) if dist[0].isupper()] + + # Remove SplineFlow from distns + distns.remove("SplineFlow") + + # Remove Expectile from distns + distns.remove("Expectile") + + # Extract all continous univariate distributions + univar_cont_distns = [] + for distribution_name in distns: + # Import the module dynamically + module = importlib.import_module(f"lightgbmlss.distributions.{distribution_name}") + + # Get the class dynamically from the module + distribution_class = getattr(module, distribution_name) + + if distribution_class().univariate and not distribution_class().discrete: + univar_cont_distns.append(distribution_class) + + # Exctract discrete univariate distributions only + univar_discrete_distns = [] + for distribution_name in distns: + # Import the module dynamically + module = importlib.import_module(f"lightgbmlss.distributions.{distribution_name}") + + # Get the class dynamically from the module + distribution_class = getattr(module, distribution_name) + + if distribution_class().univariate and distribution_class().discrete: + univar_discrete_distns.append(distribution_class) + + # Extract all multivariate distributions + multivar_distns = [] + for distribution_name in distns: + # Import the module dynamically + module = importlib.import_module(f"lightgbmlss.distributions.{distribution_name}") + + # Get the class dynamically from the module + distribution_class = getattr(module, distribution_name) + + if not distribution_class().univariate: + multivar_distns.append(distribution_class) + + # Extract distributions only that have a rsample method + rsample_distns = [] + for distribution_name in distns: + # Import the module dynamically + module = importlib.import_module(f"lightgbmlss.distributions.{distribution_name}") + + # Get the class dynamically from the module + distribution_class = getattr(module, distribution_name) + + # Create an instance of the distribution class + dist_class = LightGBMLSS(distribution_class()) + params = torch.tensor([0.5 for _ in range(dist_class.dist.n_dist_param)]) + + # Check if the distribution is univariate and has a rsample method + if distribution_class().univariate and dist_class.dist.tau is None: + dist_kwargs = dict(zip(dist_class.dist.distribution_arg_names, params)) + dist_fit = dist_class.dist.distribution(**dist_kwargs) + + elif distribution_class().univariate and dist_class.dist.tau is not None: + dist_fit = dist_class.dist.distribution(params) + + try: + dist_fit.rsample() + if distribution_class().univariate: + rsample_distns.append(distribution_class) + except NotImplementedError: + pass + + if univariate and not flow and not expectile: + if discrete: + return univar_discrete_distns + elif rsample: + return rsample_distns + elif continuous: + return univar_cont_distns + else: + return univar_cont_distns + + elif not univariate and not flow and not expectile: + return multivar_distns + + elif flow: + distribution_name = "SplineFlow" + module = importlib.import_module(f"lightgbmlss.distributions.{distribution_name}") + # Get the class dynamically from the module + distribution_class = [getattr(module, distribution_name)] + + return distribution_class + + elif expectile: + distribution_name = "Expectile" + module = importlib.import_module(f"lightgbmlss.distributions.{distribution_name}") + # Get the class dynamically from the module + distribution_class = [getattr(module, distribution_name)] + + return distribution_class + + +class BaseTestClass: + @pytest.fixture(params=get_distribution_classes(continuous=True)) + def univariate_cont_dist(self, request): + return request.param + + @pytest.fixture(params=get_distribution_classes(discrete=True)) + def univariate_discrete_dist(self, request): + return request.param + + @pytest.fixture(params=get_distribution_classes(univariate=False)) + def multivariate_dist(self, request): + return request.param + + @pytest.fixture(params=get_distribution_classes(flow=True)) + def flow_dist(self, request): + return request.param + + @pytest.fixture(params=get_distribution_classes(expectile=True)) + def expectile_dist(self, request): + return request.param + + @pytest.fixture(params= + get_distribution_classes() + + get_distribution_classes(discrete=True) + + get_distribution_classes(expectile=True) + + get_distribution_classes(flow=True) + + get_distribution_classes(univariate=False) + ) + def dist_class(self, request): + return LightGBMLSS(request.param()) + + @pytest.fixture(params=get_distribution_classes(flow=True)) + def flow_class(self, request): + return LightGBMLSS(request.param()) + + @pytest.fixture(params=get_distribution_classes(univariate=False)) + def multivariate_class(self, request): + return LightGBMLSS(request.param()) + + @pytest.fixture(params=get_distribution_classes(rsample=True)) + def dist_class_crps(self, request): + return LightGBMLSS(request.param()) + + @pytest.fixture(params=["nll"]) + def loss_fn(self, request): + return request.param + + @pytest.fixture(params=["None", "MAD", "L2"]) + def stabilization(self, request): + return request.param + + @pytest.fixture(params=[True, False]) + def requires_grad(self, request): + return request.param + + @pytest.fixture(params=["samples", "quantiles", "parameters", "expectiles"]) + def pred_type(self, request): + return request.param