From 36137dad9d888d8972fc21300a4cbc578b643332 Mon Sep 17 00:00:00 2001 From: VishnoiAman777 Date: Thu, 11 Dec 2025 10:43:14 +0530 Subject: [PATCH 1/3] added warning when Any time is returned from the function call and fixed return types --- mapie/conformity_scores/bounds/residuals.py | 2 +- mapie/metrics/calibration.py | 12 ++++++------ mapie/metrics/classification.py | 4 ++-- mapie/metrics/regression.py | 2 +- mapie/utils.py | 2 +- mypy.ini | 1 + 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/mapie/conformity_scores/bounds/residuals.py b/mapie/conformity_scores/bounds/residuals.py index 9b883ff56..fbed94fc6 100644 --- a/mapie/conformity_scores/bounds/residuals.py +++ b/mapie/conformity_scores/bounds/residuals.py @@ -166,7 +166,7 @@ def _fit_residual_estimator( X: NDArray, y: NDArray, y_pred: NDArray, - ) -> Tuple[NDArray, NDArray]: + ) -> RegressorMixin: """ Fit the residual estimator and returns the indexes used for the training of the base estimator and those needed for the conformalization. diff --git a/mapie/metrics/calibration.py b/mapie/metrics/calibration.py index ec1f10db7..df8829425 100644 --- a/mapie/metrics/calibration.py +++ b/mapie/metrics/calibration.py @@ -73,9 +73,9 @@ def expected_calibration_error( y_true_, y_score, num_bins, split_strategy ) - return np.divide( + return float(np.divide( np.sum(bin_sizes * np.abs(bin_accs - bin_confs)), np.sum(bin_sizes) - ) + )) def top_label_ece( @@ -348,7 +348,7 @@ def length_scale(s: NDArray) -> float: """ n = len(s) length_scale = np.sqrt(np.sum(s * (1 - s))) / n - return length_scale + return float(length_scale) def kolmogorov_smirnov_statistic(y_true: NDArray, y_score: NDArray) -> float: @@ -561,7 +561,7 @@ def kuiper_statistic(y_true: NDArray, y_score: NDArray) -> float: cum_diff = cumulative_differences(y_true, y_score) sigma = length_scale(y_score) ku_stat = (np.max(cum_diff) - np.min(cum_diff)) / sigma # type: ignore - return ku_stat + return float(ku_stat) def kuiper_cdf(x: float) -> float: @@ -727,7 +727,7 @@ def spiegelhalter_statistic(y_true: NDArray, y_score: NDArray) -> float: numerator: float = np.sum((y_true - y_score) * (1 - 2 * y_score)) denominator = np.sqrt(np.sum((1 - 2 * y_score) ** 2 * y_score * (1 - y_score))) sp_stat = numerator / denominator - return sp_stat + return float(sp_stat) def spiegelhalter_p_value(y_true: NDArray, y_score: NDArray) -> float: @@ -775,4 +775,4 @@ def spiegelhalter_p_value(y_true: NDArray, y_score: NDArray) -> float: _check_array_inf(y_score) sp_stat = spiegelhalter_statistic(y_true, y_score) sp_p_value = 1 - scipy.stats.norm.cdf(sp_stat) - return sp_p_value + return float(sp_p_value) diff --git a/mapie/metrics/classification.py b/mapie/metrics/classification.py index a9e43bac4..9a1f70b39 100644 --- a/mapie/metrics/classification.py +++ b/mapie/metrics/classification.py @@ -14,7 +14,7 @@ ) -def classification_mean_width_score(y_pred_set: ArrayLike) -> float: +def classification_mean_width_score(y_pred_set: ArrayLike) -> NDArray: """ Mean width of prediction set output by :class:`~mapie.classification._MapieClassifier`. @@ -48,7 +48,7 @@ def classification_mean_width_score(y_pred_set: ArrayLike) -> float: _check_array_inf(y_pred_set) width = y_pred_set.sum(axis=1) mean_width = width.mean(axis=0) - return mean_width + return cast(NDArray, mean_width) def classification_coverage_score(y_true: NDArray, y_pred_set: NDArray) -> NDArray: diff --git a/mapie/metrics/regression.py b/mapie/metrics/regression.py index 47d2a26c2..aa4bc8dc4 100644 --- a/mapie/metrics/regression.py +++ b/mapie/metrics/regression.py @@ -544,5 +544,5 @@ def regression_mwi_score( error_above: float = np.sum((y_true - y_pred_up)[y_true > y_pred_up]) error_below: float = np.sum((y_pred_low - y_true)[y_true < y_pred_low]) total_error = error_above + error_below - mwi = (width + total_error * 2 / (1 - confidence_level)) / len(y_true) + mwi = float((width + total_error * 2 / (1 - confidence_level)) / len(y_true)) return mwi diff --git a/mapie/utils.py b/mapie/utils.py index 5182a06c8..ec0256391 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -428,7 +428,7 @@ def _check_no_agg_cv( elif isinstance(cv, int): return cv == 1 elif hasattr(cv, "get_n_splits"): - return cv.get_n_splits(X, y, groups) == 1 + return bool(cv.get_n_splits(X, y, groups) == 1) else: raise ValueError( "Invalid cv argument. " diff --git a/mypy.ini b/mypy.ini index 358fc124d..7116c71dd 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,6 +1,7 @@ [mypy] python_version = 3.9 ignore_missing_imports = True +warn_return_any = True [mypy-sklearn.*] ignore_errors = True From e7b69fc7c610cdac04735ea45444e48dd235cf47 Mon Sep 17 00:00:00 2001 From: VishnoiAman777 Date: Thu, 11 Dec 2025 10:58:10 +0530 Subject: [PATCH 2/3] Fixed formatting issues --- mapie/metrics/calibration.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mapie/metrics/calibration.py b/mapie/metrics/calibration.py index df8829425..d5f2c9bd5 100644 --- a/mapie/metrics/calibration.py +++ b/mapie/metrics/calibration.py @@ -73,9 +73,9 @@ def expected_calibration_error( y_true_, y_score, num_bins, split_strategy ) - return float(np.divide( - np.sum(bin_sizes * np.abs(bin_accs - bin_confs)), np.sum(bin_sizes) - )) + return float( + np.divide(np.sum(bin_sizes * np.abs(bin_accs - bin_confs)), np.sum(bin_sizes)) + ) def top_label_ece( From eade17dc2e680a240afbe310016e7a6692ebdbdf Mon Sep 17 00:00:00 2001 From: VishnoiAman777 Date: Thu, 11 Dec 2025 12:32:26 +0530 Subject: [PATCH 3/3] Fixed type-checks with respect to environment in ci/cd --- mapie/aggregation_functions.py | 6 +++--- mapie/calibration.py | 2 +- mapie/classification.py | 2 +- mapie/conformity_scores/bounds/residuals.py | 2 +- mapie/conformity_scores/sets/aps.py | 4 ++-- mapie/conformity_scores/sets/lac.py | 2 +- mapie/conformity_scores/sets/naive.py | 4 ++-- mapie/conformity_scores/sets/raps.py | 2 +- mapie/conformity_scores/sets/topk.py | 2 +- mapie/conformity_scores/sets/utils.py | 4 ++-- mapie/estimator/classifier.py | 2 +- mapie/estimator/regressor.py | 10 +++++----- mapie/metrics/calibration.py | 6 +++--- mapie/metrics/classification.py | 6 ++++-- mapie/metrics/regression.py | 10 +++++----- mapie/risk_control/binary_classification.py | 13 ++++++++----- mapie/risk_control/methods.py | 4 ++-- mapie/risk_control/risks.py | 6 +++--- mapie/utils.py | 4 ++-- 19 files changed, 48 insertions(+), 43 deletions(-) diff --git a/mapie/aggregation_functions.py b/mapie/aggregation_functions.py index ad35f2214..99860f012 100644 --- a/mapie/aggregation_functions.py +++ b/mapie/aggregation_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Optional +from typing import Callable, Optional, cast import numpy as np @@ -113,7 +113,7 @@ def aggregate_all(agg_function: Optional[str], X: NDArray) -> NDArray: """ if agg_function == "median": - return np.nanmedian(X, axis=1) + return cast(NDArray, np.nanmedian(X, axis=1)) elif agg_function == "mean": - return np.nanmean(X, axis=1) + return cast(NDArray, np.nanmean(X, axis=1)) raise ValueError("Aggregation function called but not defined.") diff --git a/mapie/calibration.py b/mapie/calibration.py index 02586486a..6a82f4133 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -548,4 +548,4 @@ def predict( The class from the scores. """ check_is_fitted(self) - return self.single_estimator_.predict(X) + return cast(NDArray, self.single_estimator_.predict(X)) diff --git a/mapie/classification.py b/mapie/classification.py index a3b0964f7..fd48411c3 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -1061,7 +1061,7 @@ def predict( y_pred_proba = check_proba_normalized(y_pred_proba, axis=1) y_pred = self.label_encoder_.inverse_transform(np.argmax(y_pred_proba, axis=1)) if alpha is None: - return y_pred + return cast(NDArray, y_pred) # Estimate of probabilities from estimator(s) # In all cases: len(y_pred_proba.shape) == 3 diff --git a/mapie/conformity_scores/bounds/residuals.py b/mapie/conformity_scores/bounds/residuals.py index fbed94fc6..d5b4692d9 100644 --- a/mapie/conformity_scores/bounds/residuals.py +++ b/mapie/conformity_scores/bounds/residuals.py @@ -225,7 +225,7 @@ def _predict_residual_estimator(self, X: ArrayLike) -> NDArray: + "the residuals and his predict method should return " + "the exponential of the predictions." ) - return pred + return cast(NDArray, pred) def get_signed_conformity_scores( self, y: ArrayLike, y_pred: ArrayLike, X: Optional[ArrayLike] = None, **kwargs diff --git a/mapie/conformity_scores/sets/aps.py b/mapie/conformity_scores/sets/aps.py index 5304e7a61..1ce944e08 100644 --- a/mapie/conformity_scores/sets/aps.py +++ b/mapie/conformity_scores/sets/aps.py @@ -236,7 +236,7 @@ def _compute_v_parameter( v_param = ( y_proba_last_cumsumed - threshold.reshape(1, -1) ) / y_pred_proba_last[:, 0, :] - return v_param + return cast(NDArray, v_param) def _add_random_tie_breaking( self, @@ -421,4 +421,4 @@ def get_prediction_sets( EPSILON, ) - return prediction_sets + return cast(NDArray, prediction_sets) diff --git a/mapie/conformity_scores/sets/lac.py b/mapie/conformity_scores/sets/lac.py index 3094c81dd..40ca51f01 100644 --- a/mapie/conformity_scores/sets/lac.py +++ b/mapie/conformity_scores/sets/lac.py @@ -217,4 +217,4 @@ def get_prediction_sets( axis=2, ) - return prediction_sets + return cast(NDArray, prediction_sets) diff --git a/mapie/conformity_scores/sets/naive.py b/mapie/conformity_scores/sets/naive.py index 4152d3409..a13e7c5bc 100644 --- a/mapie/conformity_scores/sets/naive.py +++ b/mapie/conformity_scores/sets/naive.py @@ -1,4 +1,4 @@ -from typing import Optional, Tuple, Union +from typing import Optional, Tuple, Union, cast import numpy as np from numpy.typing import NDArray @@ -242,4 +242,4 @@ def get_prediction_sets( # get the prediction set by taking all probabilities above the last one prediction_sets = np.greater_equal(y_pred_proba - y_pred_proba_last, -EPSILON) - return prediction_sets + return cast(NDArray, prediction_sets) diff --git a/mapie/conformity_scores/sets/raps.py b/mapie/conformity_scores/sets/raps.py index f8bdd35b9..e1ae3e896 100644 --- a/mapie/conformity_scores/sets/raps.py +++ b/mapie/conformity_scores/sets/raps.py @@ -507,4 +507,4 @@ def _compute_v_parameter( - self.lambda_star * np.maximum(0, L - self.k_star) + self.lambda_star * (L > self.k_star) ) - return v_param + return cast(NDArray, v_param) diff --git a/mapie/conformity_scores/sets/topk.py b/mapie/conformity_scores/sets/topk.py index 43d4332cc..e195af2ed 100644 --- a/mapie/conformity_scores/sets/topk.py +++ b/mapie/conformity_scores/sets/topk.py @@ -186,4 +186,4 @@ def get_prediction_sets( y_pred_proba[:, :, np.newaxis] - y_pred_proba_last, -EPSILON ) - return prediction_sets + return cast(NDArray, prediction_sets) diff --git a/mapie/conformity_scores/sets/utils.py b/mapie/conformity_scores/sets/utils.py index 638c6d6a5..c2f2e4357 100644 --- a/mapie/conformity_scores/sets/utils.py +++ b/mapie/conformity_scores/sets/utils.py @@ -1,4 +1,4 @@ -from typing import Optional, Union +from typing import Optional, Union, cast import numpy as np from numpy.typing import NDArray @@ -121,4 +121,4 @@ def get_last_index_included( ), axis=1, ) - return y_pred_index_last[:, np.newaxis, :] + return cast(NDArray, y_pred_index_last[:, np.newaxis, :]) diff --git a/mapie/estimator/classifier.py b/mapie/estimator/classifier.py index 81d7e4303..43871c742 100644 --- a/mapie/estimator/classifier.py +++ b/mapie/estimator/classifier.py @@ -237,7 +237,7 @@ def _predict_proba_oof_estimator( y_pred_proba = _fix_number_of_classes( self.n_classes, estimator.classes_, y_pred_proba ) - return y_pred_proba + return cast(NDArray, y_pred_proba) def _predict_proba_calib_oof_estimator( self, diff --git a/mapie/estimator/regressor.py b/mapie/estimator/regressor.py index 3fba10bf2..b249e3761 100644 --- a/mapie/estimator/regressor.py +++ b/mapie/estimator/regressor.py @@ -287,7 +287,7 @@ def _aggregate_with_mask(self, x: NDArray, k: NDArray) -> NDArray: if self.method in self.no_agg_methods_ or self.use_split_method_: raise ValueError("There should not be aggregation of predictions.") elif self.agg_function == "median": - return phi2D(A=x, B=k, fun=lambda x: np.nanmedian(x, axis=1)) + return cast(NDArray, phi2D(A=x, B=k, fun=lambda x: np.nanmedian(x, axis=1))) # To aggregate with mean() the aggregation coud be done # with phi2D(A=x, B=k, fun=lambda x: np.nanmean(x, axis=1). # However, phi2D contains a np.apply_along_axis loop which @@ -295,7 +295,7 @@ def _aggregate_with_mask(self, x: NDArray, k: NDArray) -> NDArray: # be used to compute the means. elif self.agg_function in ["mean", None]: K = np.nan_to_num(k, nan=0.0) - return np.matmul(x, (K / (K.sum(axis=1, keepdims=True))).T) + return cast(NDArray, np.matmul(x, (K / (K.sum(axis=1, keepdims=True))).T)) else: raise ValueError("The value of the aggregation function is not correct") @@ -394,7 +394,7 @@ def predict_calib( _check_nan_in_aposteriori_prediction(pred_matrix) y_pred = aggregate_all(self.agg_function, pred_matrix) - return y_pred + return cast(NDArray, y_pred) def fit( self, @@ -574,7 +574,7 @@ def predict( y_pred = self.single_estimator_.predict(X, **predict_params) if not return_multi_pred and not ensemble: - return y_pred + return cast(NDArray, y_pred) if self.method in self.no_agg_methods_ or self.use_split_method_: y_pred_multi_low = y_pred[:, np.newaxis] @@ -602,4 +602,4 @@ def predict( if return_multi_pred: return y_pred, y_pred_multi_low, y_pred_multi_up else: - return y_pred + return cast(NDArray, y_pred) diff --git a/mapie/metrics/calibration.py b/mapie/metrics/calibration.py index d5f2c9bd5..380ed4a9c 100644 --- a/mapie/metrics/calibration.py +++ b/mapie/metrics/calibration.py @@ -207,7 +207,7 @@ def add_jitter( random_state_np = check_random_state(random_state) noise = noise_amplitude * random_state_np.normal(size=n) x_jittered = x * (1 + noise) - return x_jittered + return cast(NDArray, x_jittered) def sort_xy_by_y(x: NDArray, y: NDArray) -> Tuple[NDArray, NDArray]: @@ -311,7 +311,7 @@ def cumulative_differences( ) y_true_sorted, y_score_sorted = sort_xy_by_y(y_true, y_score_jittered) cumulative_differences = np.cumsum(y_true_sorted - y_score_sorted) / n - return cumulative_differences + return cast(NDArray, cumulative_differences) def length_scale(s: NDArray) -> float: @@ -403,7 +403,7 @@ def kolmogorov_smirnov_statistic(y_true: NDArray, y_score: NDArray) -> float: cum_diff = cumulative_differences(y_true, y_score) sigma = length_scale(y_score) ks_stat = np.max(np.abs(cum_diff)) / sigma - return ks_stat + return float(ks_stat) def kolmogorov_smirnov_cdf(x: float) -> float: diff --git a/mapie/metrics/classification.py b/mapie/metrics/classification.py index 9a1f70b39..4bff5b35c 100644 --- a/mapie/metrics/classification.py +++ b/mapie/metrics/classification.py @@ -119,7 +119,7 @@ def classification_coverage_score(y_true: NDArray, y_pred_set: NDArray) -> NDArr y_true = np.expand_dims(y_true, axis=1) y_true = np.expand_dims(y_true, axis=1) coverage = np.nanmean(np.take_along_axis(y_pred_set, y_true, axis=1), axis=0) - return coverage[0] + return cast(NDArray, coverage[0]) def classification_ssc( @@ -243,4 +243,6 @@ def classification_ssc_score( _check_array_nan(y_pred_set) _check_array_inf(y_pred_set) - return np.nanmin(classification_ssc(y_true, y_pred_set, num_bins), axis=1) + return cast( + NDArray, np.nanmin(classification_ssc(y_true, y_pred_set, num_bins), axis=1) + ) diff --git a/mapie/metrics/regression.py b/mapie/metrics/regression.py index aa4bc8dc4..94800639d 100644 --- a/mapie/metrics/regression.py +++ b/mapie/metrics/regression.py @@ -49,7 +49,7 @@ def regression_mean_width_score(y_intervals: NDArray) -> NDArray: width = np.abs(y_intervals[:, 1, :] - y_intervals[:, 0, :]) mean_width = width.mean(axis=0) - return mean_width + return cast(NDArray, mean_width) def regression_coverage_score( @@ -129,7 +129,7 @@ def regression_coverage_score( ), axis=0, ) - return coverages + return cast(NDArray, coverages) def regression_ssc(y_true: NDArray, y_intervals: NDArray, num_bins: int = 3) -> NDArray: @@ -247,7 +247,7 @@ def regression_ssc_score( >>> print(regression_ssc_score(y_true, y_intervals, num_bins=2)) [1. 0.5] """ - return np.min(regression_ssc(y_true, y_intervals, num_bins), axis=1) + return cast(NDArray, np.min(regression_ssc(y_true, y_intervals, num_bins), axis=1)) def _gaussian_kernel(x: NDArray, kernel_size: int) -> NDArray: @@ -265,7 +265,7 @@ def _gaussian_kernel(x: NDArray, kernel_size: int) -> NDArray: dist = ( -2 * np.matmul(x, x.transpose((0, 2, 1))) + norm_x + norm_x.transpose((0, 2, 1)) ) - return np.exp(-dist / kernel_size) + return cast(NDArray, np.exp(-dist / kernel_size)) def hsic( @@ -358,7 +358,7 @@ def hsic( hsic_mat /= (n_samples - 1) ** 2 coef_hsic = np.sqrt(np.matrix.trace(hsic_mat, axis1=1, axis2=2)) - return coef_hsic + return cast(NDArray, coef_hsic) def coverage_width_based( diff --git a/mapie/risk_control/binary_classification.py b/mapie/risk_control/binary_classification.py index c7e38c809..8e0b7ddb5 100644 --- a/mapie/risk_control/binary_classification.py +++ b/mapie/risk_control/binary_classification.py @@ -1,7 +1,7 @@ from __future__ import annotations import warnings -from typing import Any, Callable, List, Literal, Optional, Tuple, Union +from typing import Any, Callable, List, Literal, Optional, Tuple, Union, cast import numpy as np from numpy.typing import ArrayLike, NDArray @@ -304,10 +304,13 @@ def predict(self, X_test: ArrayLike) -> NDArray: "Either you forgot to calibrate the controller first, " "or calibration was not successful." ) - return self._get_predictions_per_param( - X_test, - np.array([self.best_predict_param]), - )[0] + return cast( + NDArray, + self._get_predictions_per_param( + X_test, + np.array([self.best_predict_param]), + )[0], + ) def _set_best_predict_param_choice( self, diff --git a/mapie/risk_control/methods.py b/mapie/risk_control/methods.py index b3d3a6715..c46425990 100644 --- a/mapie/risk_control/methods.py +++ b/mapie/risk_control/methods.py @@ -173,7 +173,7 @@ def find_best_predict_param( best_predict_param = lambdas[ np.argmin(-np.greater_equal(bound_rep, alphas_np).astype(int), axis=1) ] - return best_predict_param + return cast(NDArray, best_predict_param) def ltt_procedure( @@ -364,7 +364,7 @@ def _h1(r_hats: NDArray, alphas: NDArray) -> NDArray: mask = r_hats != 0 elt1[mask] = r_hats[mask] * np.log(r_hats[mask] / alphas[mask]) elt2 = (1 - r_hats) * np.log((1 - r_hats) / (1 - alphas)) - return elt1 + elt2 + return cast(NDArray, elt1 + elt2) def find_precision_best_predict_param( diff --git a/mapie/risk_control/risks.py b/mapie/risk_control/risks.py index 3cd6d2adc..66edb8975 100644 --- a/mapie/risk_control/risks.py +++ b/mapie/risk_control/risks.py @@ -50,7 +50,7 @@ def compute_risk_recall(lambdas: NDArray, y_pred_proba: NDArray, y: NDArray) -> y_repeat = np.repeat(y[..., np.newaxis], n_lambdas, axis=2) risks = 1 - (_true_positive(y_pred_th, y_repeat) / y.sum(axis=1)[:, np.newaxis]) - return risks + return cast(NDArray, risks) def compute_risk_precision( @@ -101,7 +101,7 @@ def compute_risk_precision( risks = 1 - _true_positive(y_pred_th, y_repeat) / y_pred_th.sum(axis=1) risks[np.isnan(risks)] = 1 # nan value indicate high risks. - return risks + return cast(NDArray, risks) def _true_positive(y_pred_th: NDArray, y_repeat: NDArray) -> NDArray: @@ -122,7 +122,7 @@ def _true_positive(y_pred_th: NDArray, y_repeat: NDArray) -> NDArray: The number of true positive. """ tp = (y_pred_th * y_repeat).sum(axis=1) - return tp + return cast(NDArray, tp) class BinaryClassificationRisk: diff --git a/mapie/utils.py b/mapie/utils.py index ec0256391..4e6fe0981 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -876,7 +876,7 @@ def _compute_quantiles(vector: NDArray, alpha: NDArray) -> NDArray: for i, alpha_ in enumerate(alpha) ] )[:, 0] - return quantiles_ + return cast(NDArray, quantiles_) def _get_calib_set( @@ -1071,7 +1071,7 @@ def _get_binning_groups( bins = np.sort( np.array([bin_group.max() for bin_group in bin_groups[:-1]] + [np.inf]) ) - return bins + return cast(NDArray, bins) def _calc_bins(