Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions mapie/aggregation_functions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Callable, Optional
from typing import Callable, Optional, cast

import numpy as np

Expand Down Expand Up @@ -113,7 +113,7 @@ def aggregate_all(agg_function: Optional[str], X: NDArray) -> NDArray:

"""
if agg_function == "median":
return np.nanmedian(X, axis=1)
return cast(NDArray, np.nanmedian(X, axis=1))
elif agg_function == "mean":
return np.nanmean(X, axis=1)
return cast(NDArray, np.nanmean(X, axis=1))
raise ValueError("Aggregation function called but not defined.")
2 changes: 1 addition & 1 deletion mapie/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,4 +548,4 @@ def predict(
The class from the scores.
"""
check_is_fitted(self)
return self.single_estimator_.predict(X)
return cast(NDArray, self.single_estimator_.predict(X))
2 changes: 1 addition & 1 deletion mapie/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -1061,7 +1061,7 @@ def predict(
y_pred_proba = check_proba_normalized(y_pred_proba, axis=1)
y_pred = self.label_encoder_.inverse_transform(np.argmax(y_pred_proba, axis=1))
if alpha is None:
return y_pred
return cast(NDArray, y_pred)

# Estimate of probabilities from estimator(s)
# In all cases: len(y_pred_proba.shape) == 3
Expand Down
4 changes: 2 additions & 2 deletions mapie/conformity_scores/bounds/residuals.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def _fit_residual_estimator(
X: NDArray,
y: NDArray,
y_pred: NDArray,
) -> Tuple[NDArray, NDArray]:
) -> RegressorMixin:
"""
Fit the residual estimator and returns the indexes used for the
training of the base estimator and those needed for the conformalization.
Expand Down Expand Up @@ -225,7 +225,7 @@ def _predict_residual_estimator(self, X: ArrayLike) -> NDArray:
+ "the residuals and his predict method should return "
+ "the exponential of the predictions."
)
return pred
return cast(NDArray, pred)

def get_signed_conformity_scores(
self, y: ArrayLike, y_pred: ArrayLike, X: Optional[ArrayLike] = None, **kwargs
Expand Down
4 changes: 2 additions & 2 deletions mapie/conformity_scores/sets/aps.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def _compute_v_parameter(
v_param = (
y_proba_last_cumsumed - threshold.reshape(1, -1)
) / y_pred_proba_last[:, 0, :]
return v_param
return cast(NDArray, v_param)

def _add_random_tie_breaking(
self,
Expand Down Expand Up @@ -421,4 +421,4 @@ def get_prediction_sets(
EPSILON,
)

return prediction_sets
return cast(NDArray, prediction_sets)
2 changes: 1 addition & 1 deletion mapie/conformity_scores/sets/lac.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,4 +217,4 @@ def get_prediction_sets(
axis=2,
)

return prediction_sets
return cast(NDArray, prediction_sets)
4 changes: 2 additions & 2 deletions mapie/conformity_scores/sets/naive.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Tuple, Union
from typing import Optional, Tuple, Union, cast

import numpy as np
from numpy.typing import NDArray
Expand Down Expand Up @@ -242,4 +242,4 @@ def get_prediction_sets(
# get the prediction set by taking all probabilities above the last one
prediction_sets = np.greater_equal(y_pred_proba - y_pred_proba_last, -EPSILON)

return prediction_sets
return cast(NDArray, prediction_sets)
2 changes: 1 addition & 1 deletion mapie/conformity_scores/sets/raps.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,4 +507,4 @@ def _compute_v_parameter(
- self.lambda_star * np.maximum(0, L - self.k_star)
+ self.lambda_star * (L > self.k_star)
)
return v_param
return cast(NDArray, v_param)
2 changes: 1 addition & 1 deletion mapie/conformity_scores/sets/topk.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,4 +186,4 @@ def get_prediction_sets(
y_pred_proba[:, :, np.newaxis] - y_pred_proba_last, -EPSILON
)

return prediction_sets
return cast(NDArray, prediction_sets)
4 changes: 2 additions & 2 deletions mapie/conformity_scores/sets/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Union
from typing import Optional, Union, cast
import numpy as np

from numpy.typing import NDArray
Expand Down Expand Up @@ -121,4 +121,4 @@ def get_last_index_included(
),
axis=1,
)
return y_pred_index_last[:, np.newaxis, :]
return cast(NDArray, y_pred_index_last[:, np.newaxis, :])
2 changes: 1 addition & 1 deletion mapie/estimator/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def _predict_proba_oof_estimator(
y_pred_proba = _fix_number_of_classes(
self.n_classes, estimator.classes_, y_pred_proba
)
return y_pred_proba
return cast(NDArray, y_pred_proba)

def _predict_proba_calib_oof_estimator(
self,
Expand Down
10 changes: 5 additions & 5 deletions mapie/estimator/regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,15 +287,15 @@ def _aggregate_with_mask(self, x: NDArray, k: NDArray) -> NDArray:
if self.method in self.no_agg_methods_ or self.use_split_method_:
raise ValueError("There should not be aggregation of predictions.")
elif self.agg_function == "median":
return phi2D(A=x, B=k, fun=lambda x: np.nanmedian(x, axis=1))
return cast(NDArray, phi2D(A=x, B=k, fun=lambda x: np.nanmedian(x, axis=1)))
# To aggregate with mean() the aggregation coud be done
# with phi2D(A=x, B=k, fun=lambda x: np.nanmean(x, axis=1).
# However, phi2D contains a np.apply_along_axis loop which
# is much slower than the matrices multiplication that can
# be used to compute the means.
elif self.agg_function in ["mean", None]:
K = np.nan_to_num(k, nan=0.0)
return np.matmul(x, (K / (K.sum(axis=1, keepdims=True))).T)
return cast(NDArray, np.matmul(x, (K / (K.sum(axis=1, keepdims=True))).T))
else:
raise ValueError("The value of the aggregation function is not correct")

Expand Down Expand Up @@ -394,7 +394,7 @@ def predict_calib(
_check_nan_in_aposteriori_prediction(pred_matrix)
y_pred = aggregate_all(self.agg_function, pred_matrix)

return y_pred
return cast(NDArray, y_pred)

def fit(
self,
Expand Down Expand Up @@ -574,7 +574,7 @@ def predict(

y_pred = self.single_estimator_.predict(X, **predict_params)
if not return_multi_pred and not ensemble:
return y_pred
return cast(NDArray, y_pred)

if self.method in self.no_agg_methods_ or self.use_split_method_:
y_pred_multi_low = y_pred[:, np.newaxis]
Expand Down Expand Up @@ -602,4 +602,4 @@ def predict(
if return_multi_pred:
return y_pred, y_pred_multi_low, y_pred_multi_up
else:
return y_pred
return cast(NDArray, y_pred)
18 changes: 9 additions & 9 deletions mapie/metrics/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def expected_calibration_error(
y_true_, y_score, num_bins, split_strategy
)

return np.divide(
np.sum(bin_sizes * np.abs(bin_accs - bin_confs)), np.sum(bin_sizes)
return float(
np.divide(np.sum(bin_sizes * np.abs(bin_accs - bin_confs)), np.sum(bin_sizes))
)


Expand Down Expand Up @@ -207,7 +207,7 @@ def add_jitter(
random_state_np = check_random_state(random_state)
noise = noise_amplitude * random_state_np.normal(size=n)
x_jittered = x * (1 + noise)
return x_jittered
return cast(NDArray, x_jittered)


def sort_xy_by_y(x: NDArray, y: NDArray) -> Tuple[NDArray, NDArray]:
Expand Down Expand Up @@ -311,7 +311,7 @@ def cumulative_differences(
)
y_true_sorted, y_score_sorted = sort_xy_by_y(y_true, y_score_jittered)
cumulative_differences = np.cumsum(y_true_sorted - y_score_sorted) / n
return cumulative_differences
return cast(NDArray, cumulative_differences)


def length_scale(s: NDArray) -> float:
Expand Down Expand Up @@ -348,7 +348,7 @@ def length_scale(s: NDArray) -> float:
"""
n = len(s)
length_scale = np.sqrt(np.sum(s * (1 - s))) / n
return length_scale
return float(length_scale)


def kolmogorov_smirnov_statistic(y_true: NDArray, y_score: NDArray) -> float:
Expand Down Expand Up @@ -403,7 +403,7 @@ def kolmogorov_smirnov_statistic(y_true: NDArray, y_score: NDArray) -> float:
cum_diff = cumulative_differences(y_true, y_score)
sigma = length_scale(y_score)
ks_stat = np.max(np.abs(cum_diff)) / sigma
return ks_stat
return float(ks_stat)


def kolmogorov_smirnov_cdf(x: float) -> float:
Expand Down Expand Up @@ -561,7 +561,7 @@ def kuiper_statistic(y_true: NDArray, y_score: NDArray) -> float:
cum_diff = cumulative_differences(y_true, y_score)
sigma = length_scale(y_score)
ku_stat = (np.max(cum_diff) - np.min(cum_diff)) / sigma # type: ignore
return ku_stat
return float(ku_stat)


def kuiper_cdf(x: float) -> float:
Expand Down Expand Up @@ -727,7 +727,7 @@ def spiegelhalter_statistic(y_true: NDArray, y_score: NDArray) -> float:
numerator: float = np.sum((y_true - y_score) * (1 - 2 * y_score))
denominator = np.sqrt(np.sum((1 - 2 * y_score) ** 2 * y_score * (1 - y_score)))
sp_stat = numerator / denominator
return sp_stat
return float(sp_stat)


def spiegelhalter_p_value(y_true: NDArray, y_score: NDArray) -> float:
Expand Down Expand Up @@ -775,4 +775,4 @@ def spiegelhalter_p_value(y_true: NDArray, y_score: NDArray) -> float:
_check_array_inf(y_score)
sp_stat = spiegelhalter_statistic(y_true, y_score)
sp_p_value = 1 - scipy.stats.norm.cdf(sp_stat)
return sp_p_value
return float(sp_p_value)
10 changes: 6 additions & 4 deletions mapie/metrics/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
)


def classification_mean_width_score(y_pred_set: ArrayLike) -> float:
def classification_mean_width_score(y_pred_set: ArrayLike) -> NDArray:
"""
Mean width of prediction set output by
:class:`~mapie.classification._MapieClassifier`.
Expand Down Expand Up @@ -48,7 +48,7 @@ def classification_mean_width_score(y_pred_set: ArrayLike) -> float:
_check_array_inf(y_pred_set)
width = y_pred_set.sum(axis=1)
mean_width = width.mean(axis=0)
return mean_width
return cast(NDArray, mean_width)


def classification_coverage_score(y_true: NDArray, y_pred_set: NDArray) -> NDArray:
Expand Down Expand Up @@ -119,7 +119,7 @@ def classification_coverage_score(y_true: NDArray, y_pred_set: NDArray) -> NDArr
y_true = np.expand_dims(y_true, axis=1)
y_true = np.expand_dims(y_true, axis=1)
coverage = np.nanmean(np.take_along_axis(y_pred_set, y_true, axis=1), axis=0)
return coverage[0]
return cast(NDArray, coverage[0])


def classification_ssc(
Expand Down Expand Up @@ -243,4 +243,6 @@ def classification_ssc_score(
_check_array_nan(y_pred_set)
_check_array_inf(y_pred_set)

return np.nanmin(classification_ssc(y_true, y_pred_set, num_bins), axis=1)
return cast(
NDArray, np.nanmin(classification_ssc(y_true, y_pred_set, num_bins), axis=1)
)
12 changes: 6 additions & 6 deletions mapie/metrics/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def regression_mean_width_score(y_intervals: NDArray) -> NDArray:

width = np.abs(y_intervals[:, 1, :] - y_intervals[:, 0, :])
mean_width = width.mean(axis=0)
return mean_width
return cast(NDArray, mean_width)


def regression_coverage_score(
Expand Down Expand Up @@ -129,7 +129,7 @@ def regression_coverage_score(
),
axis=0,
)
return coverages
return cast(NDArray, coverages)


def regression_ssc(y_true: NDArray, y_intervals: NDArray, num_bins: int = 3) -> NDArray:
Expand Down Expand Up @@ -247,7 +247,7 @@ def regression_ssc_score(
>>> print(regression_ssc_score(y_true, y_intervals, num_bins=2))
[1. 0.5]
"""
return np.min(regression_ssc(y_true, y_intervals, num_bins), axis=1)
return cast(NDArray, np.min(regression_ssc(y_true, y_intervals, num_bins), axis=1))


def _gaussian_kernel(x: NDArray, kernel_size: int) -> NDArray:
Expand All @@ -265,7 +265,7 @@ def _gaussian_kernel(x: NDArray, kernel_size: int) -> NDArray:
dist = (
-2 * np.matmul(x, x.transpose((0, 2, 1))) + norm_x + norm_x.transpose((0, 2, 1))
)
return np.exp(-dist / kernel_size)
return cast(NDArray, np.exp(-dist / kernel_size))


def hsic(
Expand Down Expand Up @@ -358,7 +358,7 @@ def hsic(
hsic_mat /= (n_samples - 1) ** 2
coef_hsic = np.sqrt(np.matrix.trace(hsic_mat, axis1=1, axis2=2))

return coef_hsic
return cast(NDArray, coef_hsic)


def coverage_width_based(
Expand Down Expand Up @@ -544,5 +544,5 @@ def regression_mwi_score(
error_above: float = np.sum((y_true - y_pred_up)[y_true > y_pred_up])
error_below: float = np.sum((y_pred_low - y_true)[y_true < y_pred_low])
total_error = error_above + error_below
mwi = (width + total_error * 2 / (1 - confidence_level)) / len(y_true)
mwi = float((width + total_error * 2 / (1 - confidence_level)) / len(y_true))
return mwi
13 changes: 8 additions & 5 deletions mapie/risk_control/binary_classification.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import warnings
from typing import Any, Callable, List, Literal, Optional, Tuple, Union
from typing import Any, Callable, List, Literal, Optional, Tuple, Union, cast

import numpy as np
from numpy.typing import ArrayLike, NDArray
Expand Down Expand Up @@ -304,10 +304,13 @@ def predict(self, X_test: ArrayLike) -> NDArray:
"Either you forgot to calibrate the controller first, "
"or calibration was not successful."
)
return self._get_predictions_per_param(
X_test,
np.array([self.best_predict_param]),
)[0]
return cast(
NDArray,
self._get_predictions_per_param(
X_test,
np.array([self.best_predict_param]),
)[0],
)

def _set_best_predict_param_choice(
self,
Expand Down
4 changes: 2 additions & 2 deletions mapie/risk_control/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def find_best_predict_param(
best_predict_param = lambdas[
np.argmin(-np.greater_equal(bound_rep, alphas_np).astype(int), axis=1)
]
return best_predict_param
return cast(NDArray, best_predict_param)


def ltt_procedure(
Expand Down Expand Up @@ -364,7 +364,7 @@ def _h1(r_hats: NDArray, alphas: NDArray) -> NDArray:
mask = r_hats != 0
elt1[mask] = r_hats[mask] * np.log(r_hats[mask] / alphas[mask])
elt2 = (1 - r_hats) * np.log((1 - r_hats) / (1 - alphas))
return elt1 + elt2
return cast(NDArray, elt1 + elt2)


def find_precision_best_predict_param(
Expand Down
6 changes: 3 additions & 3 deletions mapie/risk_control/risks.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def compute_risk_recall(lambdas: NDArray, y_pred_proba: NDArray, y: NDArray) ->

y_repeat = np.repeat(y[..., np.newaxis], n_lambdas, axis=2)
risks = 1 - (_true_positive(y_pred_th, y_repeat) / y.sum(axis=1)[:, np.newaxis])
return risks
return cast(NDArray, risks)


def compute_risk_precision(
Expand Down Expand Up @@ -101,7 +101,7 @@ def compute_risk_precision(
risks = 1 - _true_positive(y_pred_th, y_repeat) / y_pred_th.sum(axis=1)
risks[np.isnan(risks)] = 1 # nan value indicate high risks.

return risks
return cast(NDArray, risks)


def _true_positive(y_pred_th: NDArray, y_repeat: NDArray) -> NDArray:
Expand All @@ -122,7 +122,7 @@ def _true_positive(y_pred_th: NDArray, y_repeat: NDArray) -> NDArray:
The number of true positive.
"""
tp = (y_pred_th * y_repeat).sum(axis=1)
return tp
return cast(NDArray, tp)


class BinaryClassificationRisk:
Expand Down
Loading