scikit-learn-contrib · VishnoiAman777 · Dec 11, 2025 · Dec 11, 2025 · Dec 11, 2025
diff --git a/mapie/aggregation_functions.py b/mapie/aggregation_functions.py
@@ -1,4 +1,4 @@
-from typing import Callable, Optional
+from typing import Callable, Optional, cast
 
 import numpy as np
 
@@ -113,7 +113,7 @@ def aggregate_all(agg_function: Optional[str], X: NDArray) -> NDArray:
 
     """
     if agg_function == "median":
-        return np.nanmedian(X, axis=1)
+        return cast(NDArray, np.nanmedian(X, axis=1))
     elif agg_function == "mean":
-        return np.nanmean(X, axis=1)
+        return cast(NDArray, np.nanmean(X, axis=1))
     raise ValueError("Aggregation function called but not defined.")
diff --git a/mapie/calibration.py b/mapie/calibration.py
@@ -548,4 +548,4 @@ def predict(
             The class from the scores.
         """
         check_is_fitted(self)
-        return self.single_estimator_.predict(X)
+        return cast(NDArray, self.single_estimator_.predict(X))
diff --git a/mapie/classification.py b/mapie/classification.py
@@ -1061,7 +1061,7 @@ def predict(
         y_pred_proba = check_proba_normalized(y_pred_proba, axis=1)
         y_pred = self.label_encoder_.inverse_transform(np.argmax(y_pred_proba, axis=1))
         if alpha is None:
-            return y_pred
+            return cast(NDArray, y_pred)
 
         # Estimate of probabilities from estimator(s)
         # In all cases: len(y_pred_proba.shape) == 3

diff --git a/mapie/conformity_scores/bounds/residuals.py b/mapie/conformity_scores/bounds/residuals.py
@@ -166,7 +166,7 @@ def _fit_residual_estimator(
         X: NDArray,
         y: NDArray,
         y_pred: NDArray,
-    ) -> Tuple[NDArray, NDArray]:
+    ) -> RegressorMixin:
         """
         Fit the residual estimator and returns the indexes used for the
         training of the base estimator and those needed for the conformalization.
@@ -225,7 +225,7 @@ def _predict_residual_estimator(self, X: ArrayLike) -> NDArray:
                 + "the residuals and his predict method should return "
                 + "the exponential of the predictions."
             )
-        return pred
+        return cast(NDArray, pred)
 
     def get_signed_conformity_scores(
         self, y: ArrayLike, y_pred: ArrayLike, X: Optional[ArrayLike] = None, **kwargs

diff --git a/mapie/conformity_scores/sets/aps.py b/mapie/conformity_scores/sets/aps.py
@@ -236,7 +236,7 @@ def _compute_v_parameter(
         v_param = (
             y_proba_last_cumsumed - threshold.reshape(1, -1)
         ) / y_pred_proba_last[:, 0, :]
-        return v_param
+        return cast(NDArray, v_param)
 
     def _add_random_tie_breaking(
         self,
@@ -421,4 +421,4 @@ def get_prediction_sets(
                 EPSILON,
             )
 
-        return prediction_sets
+        return cast(NDArray, prediction_sets)
diff --git a/mapie/conformity_scores/sets/lac.py b/mapie/conformity_scores/sets/lac.py
@@ -217,4 +217,4 @@ def get_prediction_sets(
                 axis=2,
             )
 
-        return prediction_sets
+        return cast(NDArray, prediction_sets)
diff --git a/mapie/conformity_scores/sets/naive.py b/mapie/conformity_scores/sets/naive.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple, Union
+from typing import Optional, Tuple, Union, cast
 
 import numpy as np
 from numpy.typing import NDArray
@@ -242,4 +242,4 @@ def get_prediction_sets(
         # get the prediction set by taking all probabilities above the last one
         prediction_sets = np.greater_equal(y_pred_proba - y_pred_proba_last, -EPSILON)
 
-        return prediction_sets
+        return cast(NDArray, prediction_sets)
diff --git a/mapie/conformity_scores/sets/raps.py b/mapie/conformity_scores/sets/raps.py
@@ -507,4 +507,4 @@ def _compute_v_parameter(
             - self.lambda_star * np.maximum(0, L - self.k_star)
             + self.lambda_star * (L > self.k_star)
         )
-        return v_param
+        return cast(NDArray, v_param)
diff --git a/mapie/conformity_scores/sets/topk.py b/mapie/conformity_scores/sets/topk.py
@@ -186,4 +186,4 @@ def get_prediction_sets(
             y_pred_proba[:, :, np.newaxis] - y_pred_proba_last, -EPSILON
         )
 
-        return prediction_sets
+        return cast(NDArray, prediction_sets)
diff --git a/mapie/conformity_scores/sets/utils.py b/mapie/conformity_scores/sets/utils.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union
+from typing import Optional, Union, cast
 import numpy as np
 
 from numpy.typing import NDArray
@@ -121,4 +121,4 @@ def get_last_index_included(
             ),
             axis=1,
         )
-    return y_pred_index_last[:, np.newaxis, :]
+    return cast(NDArray, y_pred_index_last[:, np.newaxis, :])
diff --git a/mapie/estimator/classifier.py b/mapie/estimator/classifier.py
@@ -237,7 +237,7 @@ def _predict_proba_oof_estimator(
             y_pred_proba = _fix_number_of_classes(
                 self.n_classes, estimator.classes_, y_pred_proba
             )
-        return y_pred_proba
+        return cast(NDArray, y_pred_proba)
 
     def _predict_proba_calib_oof_estimator(
         self,

diff --git a/mapie/estimator/regressor.py b/mapie/estimator/regressor.py
@@ -287,15 +287,15 @@ def _aggregate_with_mask(self, x: NDArray, k: NDArray) -> NDArray:
         if self.method in self.no_agg_methods_ or self.use_split_method_:
             raise ValueError("There should not be aggregation of predictions.")
         elif self.agg_function == "median":
-            return phi2D(A=x, B=k, fun=lambda x: np.nanmedian(x, axis=1))
+            return cast(NDArray, phi2D(A=x, B=k, fun=lambda x: np.nanmedian(x, axis=1)))
         # To aggregate with mean() the aggregation coud be done
         # with phi2D(A=x, B=k, fun=lambda x: np.nanmean(x, axis=1).
         # However, phi2D contains a np.apply_along_axis loop which
         # is much slower than the matrices multiplication that can
         # be used to compute the means.
         elif self.agg_function in ["mean", None]:
             K = np.nan_to_num(k, nan=0.0)
-            return np.matmul(x, (K / (K.sum(axis=1, keepdims=True))).T)
+            return cast(NDArray, np.matmul(x, (K / (K.sum(axis=1, keepdims=True))).T))
         else:
             raise ValueError("The value of the aggregation function is not correct")
 
@@ -394,7 +394,7 @@ def predict_calib(
                     _check_nan_in_aposteriori_prediction(pred_matrix)
                     y_pred = aggregate_all(self.agg_function, pred_matrix)
 
-        return y_pred
+        return cast(NDArray, y_pred)
 
     def fit(
         self,
@@ -574,7 +574,7 @@ def predict(
 
         y_pred = self.single_estimator_.predict(X, **predict_params)
         if not return_multi_pred and not ensemble:
-            return y_pred
+            return cast(NDArray, y_pred)
 
         if self.method in self.no_agg_methods_ or self.use_split_method_:
             y_pred_multi_low = y_pred[:, np.newaxis]
@@ -602,4 +602,4 @@ def predict(
         if return_multi_pred:
             return y_pred, y_pred_multi_low, y_pred_multi_up
         else:
-            return y_pred
+            return cast(NDArray, y_pred)
diff --git a/mapie/metrics/calibration.py b/mapie/metrics/calibration.py
@@ -73,8 +73,8 @@ def expected_calibration_error(
         y_true_, y_score, num_bins, split_strategy
     )
 
-    return np.divide(
-        np.sum(bin_sizes * np.abs(bin_accs - bin_confs)), np.sum(bin_sizes)
+    return float(
+        np.divide(np.sum(bin_sizes * np.abs(bin_accs - bin_confs)), np.sum(bin_sizes))
     )
 
 
@@ -207,7 +207,7 @@ def add_jitter(
     random_state_np = check_random_state(random_state)
     noise = noise_amplitude * random_state_np.normal(size=n)
     x_jittered = x * (1 + noise)
-    return x_jittered
+    return cast(NDArray, x_jittered)
 
 
 def sort_xy_by_y(x: NDArray, y: NDArray) -> Tuple[NDArray, NDArray]:
@@ -311,7 +311,7 @@ def cumulative_differences(
     )
     y_true_sorted, y_score_sorted = sort_xy_by_y(y_true, y_score_jittered)
     cumulative_differences = np.cumsum(y_true_sorted - y_score_sorted) / n
-    return cumulative_differences
+    return cast(NDArray, cumulative_differences)
 
 
 def length_scale(s: NDArray) -> float:
@@ -348,7 +348,7 @@ def length_scale(s: NDArray) -> float:
     """
     n = len(s)
     length_scale = np.sqrt(np.sum(s * (1 - s))) / n
-    return length_scale
+    return float(length_scale)
 
 
 def kolmogorov_smirnov_statistic(y_true: NDArray, y_score: NDArray) -> float:
@@ -403,7 +403,7 @@ def kolmogorov_smirnov_statistic(y_true: NDArray, y_score: NDArray) -> float:
     cum_diff = cumulative_differences(y_true, y_score)
     sigma = length_scale(y_score)
     ks_stat = np.max(np.abs(cum_diff)) / sigma
-    return ks_stat
+    return float(ks_stat)
 
 
 def kolmogorov_smirnov_cdf(x: float) -> float:
@@ -561,7 +561,7 @@ def kuiper_statistic(y_true: NDArray, y_score: NDArray) -> float:
     cum_diff = cumulative_differences(y_true, y_score)
     sigma = length_scale(y_score)
     ku_stat = (np.max(cum_diff) - np.min(cum_diff)) / sigma  # type: ignore
-    return ku_stat
+    return float(ku_stat)
 
 
 def kuiper_cdf(x: float) -> float:
@@ -727,7 +727,7 @@ def spiegelhalter_statistic(y_true: NDArray, y_score: NDArray) -> float:
     numerator: float = np.sum((y_true - y_score) * (1 - 2 * y_score))
     denominator = np.sqrt(np.sum((1 - 2 * y_score) ** 2 * y_score * (1 - y_score)))
     sp_stat = numerator / denominator
-    return sp_stat
+    return float(sp_stat)
 
 
 def spiegelhalter_p_value(y_true: NDArray, y_score: NDArray) -> float:
@@ -775,4 +775,4 @@ def spiegelhalter_p_value(y_true: NDArray, y_score: NDArray) -> float:
     _check_array_inf(y_score)
     sp_stat = spiegelhalter_statistic(y_true, y_score)
     sp_p_value = 1 - scipy.stats.norm.cdf(sp_stat)
-    return sp_p_value
+    return float(sp_p_value)
diff --git a/mapie/metrics/classification.py b/mapie/metrics/classification.py
@@ -14,7 +14,7 @@
 )
 
 
-def classification_mean_width_score(y_pred_set: ArrayLike) -> float:
+def classification_mean_width_score(y_pred_set: ArrayLike) -> NDArray:
     """
     Mean width of prediction set output by
     :class:`~mapie.classification._MapieClassifier`.
@@ -48,7 +48,7 @@ def classification_mean_width_score(y_pred_set: ArrayLike) -> float:
     _check_array_inf(y_pred_set)
     width = y_pred_set.sum(axis=1)
     mean_width = width.mean(axis=0)
-    return mean_width
+    return cast(NDArray, mean_width)
 
 
 def classification_coverage_score(y_true: NDArray, y_pred_set: NDArray) -> NDArray:
@@ -119,7 +119,7 @@ def classification_coverage_score(y_true: NDArray, y_pred_set: NDArray) -> NDArr
         y_true = np.expand_dims(y_true, axis=1)
     y_true = np.expand_dims(y_true, axis=1)
     coverage = np.nanmean(np.take_along_axis(y_pred_set, y_true, axis=1), axis=0)
-    return coverage[0]
+    return cast(NDArray, coverage[0])
 
 
 def classification_ssc(
@@ -243,4 +243,6 @@ def classification_ssc_score(
     _check_array_nan(y_pred_set)
     _check_array_inf(y_pred_set)
 
-    return np.nanmin(classification_ssc(y_true, y_pred_set, num_bins), axis=1)
+    return cast(
+        NDArray, np.nanmin(classification_ssc(y_true, y_pred_set, num_bins), axis=1)
+    )
diff --git a/mapie/metrics/regression.py b/mapie/metrics/regression.py
@@ -49,7 +49,7 @@ def regression_mean_width_score(y_intervals: NDArray) -> NDArray:
 
     width = np.abs(y_intervals[:, 1, :] - y_intervals[:, 0, :])
     mean_width = width.mean(axis=0)
-    return mean_width
+    return cast(NDArray, mean_width)
 
 
 def regression_coverage_score(
@@ -129,7 +129,7 @@ def regression_coverage_score(
         ),
         axis=0,
     )
-    return coverages
+    return cast(NDArray, coverages)
 
 
 def regression_ssc(y_true: NDArray, y_intervals: NDArray, num_bins: int = 3) -> NDArray:
@@ -247,7 +247,7 @@ def regression_ssc_score(
     >>> print(regression_ssc_score(y_true, y_intervals, num_bins=2))
     [1.  0.5]
     """
-    return np.min(regression_ssc(y_true, y_intervals, num_bins), axis=1)
+    return cast(NDArray, np.min(regression_ssc(y_true, y_intervals, num_bins), axis=1))
 
 
 def _gaussian_kernel(x: NDArray, kernel_size: int) -> NDArray:
@@ -265,7 +265,7 @@ def _gaussian_kernel(x: NDArray, kernel_size: int) -> NDArray:
     dist = (
         -2 * np.matmul(x, x.transpose((0, 2, 1))) + norm_x + norm_x.transpose((0, 2, 1))
     )
-    return np.exp(-dist / kernel_size)
+    return cast(NDArray, np.exp(-dist / kernel_size))
 
 
 def hsic(
@@ -358,7 +358,7 @@ def hsic(
     hsic_mat /= (n_samples - 1) ** 2
     coef_hsic = np.sqrt(np.matrix.trace(hsic_mat, axis1=1, axis2=2))
 
-    return coef_hsic
+    return cast(NDArray, coef_hsic)
 
 
 def coverage_width_based(
@@ -544,5 +544,5 @@ def regression_mwi_score(
     error_above: float = np.sum((y_true - y_pred_up)[y_true > y_pred_up])
     error_below: float = np.sum((y_pred_low - y_true)[y_true < y_pred_low])
     total_error = error_above + error_below
-    mwi = (width + total_error * 2 / (1 - confidence_level)) / len(y_true)
+    mwi = float((width + total_error * 2 / (1 - confidence_level)) / len(y_true))
     return mwi
diff --git a/mapie/risk_control/binary_classification.py b/mapie/risk_control/binary_classification.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import warnings
-from typing import Any, Callable, List, Literal, Optional, Tuple, Union
+from typing import Any, Callable, List, Literal, Optional, Tuple, Union, cast
 
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
@@ -304,10 +304,13 @@ def predict(self, X_test: ArrayLike) -> NDArray:
                 "Either you forgot to calibrate the controller first, "
                 "or calibration was not successful."
             )
-        return self._get_predictions_per_param(
-            X_test,
-            np.array([self.best_predict_param]),
-        )[0]
+        return cast(
+            NDArray,
+            self._get_predictions_per_param(
+                X_test,
+                np.array([self.best_predict_param]),
+            )[0],
+        )
 
     def _set_best_predict_param_choice(
         self,

diff --git a/mapie/risk_control/methods.py b/mapie/risk_control/methods.py
@@ -173,7 +173,7 @@ def find_best_predict_param(
     best_predict_param = lambdas[
         np.argmin(-np.greater_equal(bound_rep, alphas_np).astype(int), axis=1)
     ]
-    return best_predict_param
+    return cast(NDArray, best_predict_param)
 
 
 def ltt_procedure(
@@ -364,7 +364,7 @@ def _h1(r_hats: NDArray, alphas: NDArray) -> NDArray:
     mask = r_hats != 0
     elt1[mask] = r_hats[mask] * np.log(r_hats[mask] / alphas[mask])
     elt2 = (1 - r_hats) * np.log((1 - r_hats) / (1 - alphas))
-    return elt1 + elt2
+    return cast(NDArray, elt1 + elt2)
 
 
 def find_precision_best_predict_param(

diff --git a/mapie/risk_control/risks.py b/mapie/risk_control/risks.py
@@ -50,7 +50,7 @@ def compute_risk_recall(lambdas: NDArray, y_pred_proba: NDArray, y: NDArray) ->
 
     y_repeat = np.repeat(y[..., np.newaxis], n_lambdas, axis=2)
     risks = 1 - (_true_positive(y_pred_th, y_repeat) / y.sum(axis=1)[:, np.newaxis])
-    return risks
+    return cast(NDArray, risks)
 
 
 def compute_risk_precision(
@@ -101,7 +101,7 @@ def compute_risk_precision(
         risks = 1 - _true_positive(y_pred_th, y_repeat) / y_pred_th.sum(axis=1)
     risks[np.isnan(risks)] = 1  # nan value indicate high risks.
 
-    return risks
+    return cast(NDArray, risks)
 
 
 def _true_positive(y_pred_th: NDArray, y_repeat: NDArray) -> NDArray:
@@ -122,7 +122,7 @@ def _true_positive(y_pred_th: NDArray, y_repeat: NDArray) -> NDArray:
         The number of true positive.
     """
     tp = (y_pred_th * y_repeat).sum(axis=1)
-    return tp
+    return cast(NDArray, tp)
 
 
 class BinaryClassificationRisk:
-Original file line number
+Diff line change
@@ Expand Up / @@ -217,4 +217,4 @@ def get_prediction_sets( @@
                     axis=2,
                 )
-            return prediction_sets
+            return cast(NDArray, prediction_sets)