scikit-learn-contrib · dimoibiehg · Oct 17, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -54,6 +54,7 @@ Contributors
 * Faustin Pulvéric <[email protected]>
 * Chaoqi Zhang <[email protected]>
 * Leena Kamran Qidwai
+* Omid Gheibi <[email protected]>
 * Aman Vishnoi <[email protected]>
 * Hannes Körner <HannesMK>
 To be continued ...
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -4,6 +4,8 @@ History
 
 1.x.x (2025-xx-xx)
 ------------------
+* Introduce VennAbers calibrator both for binary and multiclass classification
+
 * Remove dependency of internal classes on sklearn's check_is_fitted
 * Add an example of risk control with LLM as a judge
 * Add comparison with naive threshold in risk control quick start example

diff --git a/examples/calibration/1-quickstart/plot_calibration_venn_abers_binary.py b/examples/calibration/1-quickstart/plot_calibration_venn_abers_binary.py
@@ -0,0 +1,98 @@
+"""
+=================================================
+Calibrating binary classifier with Venn-ABERS
+=================================================
+This example shows how to calibrate a binary classifier with
+:class:`~mapie.calibration.VennAbersCalibrator` and visualize the
+impact on predicted probabilities.
+
+We compare an uncalibrated model to its Venn-ABERS calibrated version
+using reliability diagrams and Brier scores.
+"""
+
+from __future__ import annotations
+
+import matplotlib.pyplot as plt
+from sklearn.calibration import CalibrationDisplay
+from sklearn.datasets import make_classification
+from sklearn.metrics import brier_score_loss
+from sklearn.model_selection import train_test_split
+
+from mapie.calibration import VennAbersCalibrator
+
+####################################################################
+# 1. Build a miscalibrated binary classifier
+# ---------------------------------------------------
+# We generate a toy binary dataset and fit a random forest model
+# which is known to be miscalibrated out of the box (produces
+# probabilities too close to 0 or 1). We use a larger dataset to
+# ensure sufficient data for proper calibration.
+
+from sklearn.ensemble import RandomForestClassifier
+
+X, y = make_classification(
+    n_samples=5000,
+    n_features=20,
+    n_informative=10,
+    n_redundant=2,
+    class_sep=0.8,
+    random_state=42,
+)
+
+# Split into train, calibration, and test sets
+X_temp, X_test, y_temp, y_test = train_test_split(
+    X, y, test_size=0.3, random_state=42, stratify=y
+)
+
+X_train, X_calib, y_train, y_calib = train_test_split(
+    X_temp, y_temp, test_size=0.3, random_state=42, stratify=y_temp
+)
+
+# Use Random Forest which tends to be miscalibrated
+base_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
+base_model.fit(X_train, y_train)
+probs_raw = base_model.predict_proba(X_test)[:, 1]
+raw_brier = brier_score_loss(y_test, probs_raw)
+
+####################################################################
+# 2. Calibrate with Venn-ABERS
+# ----------------------------
+# We wrap the same base model in :class:`~mapie.calibration.VennAbersCalibrator`
+# using the inductive mode (default). The calibrator uses the calibration set
+# to learn a calibration mapping that will improve probability estimates.
+
+va_calibrator = VennAbersCalibrator(
+    estimator=RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42),
+    inductive=True,
+    random_state=42,
+)
+va_calibrator.fit(X_train, y_train, X_calib=X_calib, y_calib=y_calib)
+probs_va = va_calibrator.predict_proba(X_test)[:, 1]
+va_brier = brier_score_loss(y_test, probs_va)
+
+####################################################################
+# 3. Reliability diagrams and Brier scores
+# ----------------------------------------
+# Reliability diagrams show how predicted probabilities compare to
+# observed frequencies. Perfect calibration lies on the diagonal.
+# We also display Brier scores to quantify the improvement.
+
+fig, axes = plt.subplots(1, 2, figsize=(12, 5))
+CalibrationDisplay.from_predictions(
+    y_test,
+    probs_raw,
+    name=f"Uncalibrated (Brier={raw_brier:.3f})",
+    n_bins=10,
+    ax=axes[0],
+)
+CalibrationDisplay.from_predictions(
+    y_test,
+    probs_va,
+    name=f"Venn-ABERS (Brier={va_brier:.3f})",
+    n_bins=10,
+    ax=axes[1],
+)
+axes[0].set_title("Before calibration")
+axes[1].set_title("After Venn-ABERS calibration")
+plt.tight_layout()
+plt.show()
diff --git a/examples/calibration/1-quickstart/plot_calibration_venn_abers_multiclass.py b/examples/calibration/1-quickstart/plot_calibration_venn_abers_multiclass.py
@@ -0,0 +1,125 @@
+"""
+====================================================
+Calibrating multi-class classifier with Venn-ABERS
+====================================================
+This example shows how to calibrate a multi-class classifier with
+:class:`~mapie.calibration.VennAbersCalibrator` and visualize the
+impact on predicted probabilities. We compare an uncalibrated model
+against its Venn-ABERS calibrated version using reliability diagrams
+and multi-class Brier scores.
+"""
+
+from __future__ import annotations
+
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.calibration import calibration_curve
+from sklearn.datasets import make_classification
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import label_binarize
+
+from mapie.calibration import VennAbersCalibrator
+
+####################################################################
+# 1. Build a miscalibrated multi-class classifier
+# -----------------------------------------------
+# We generate a 3-class dataset and fit a random forest model,
+# which is known to be miscalibrated out of the box.
+
+from sklearn.ensemble import RandomForestClassifier
+
+X, y = make_classification(
+    n_samples=5000,
+    n_features=20,
+    n_informative=12,
+    n_redundant=2,
+    n_classes=3,
+    n_clusters_per_class=1,
+    class_sep=0.8,
+    random_state=7,
+)
+
+classes = np.unique(y)
+# Split into train, calibration, and test sets
+X_temp, X_test, y_temp, y_test = train_test_split(
+    X, y, test_size=0.3, random_state=7, stratify=y
+)
+
+X_train, X_calib, y_train, y_calib = train_test_split(
+    X_temp, y_temp, test_size=0.3, random_state=7, stratify=y_temp
+)
+
+base_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=7)
+base_model.fit(X_train, y_train)
+probs_raw = base_model.predict_proba(X_test)
+
+####################################################################
+# 2. Calibrate with Venn-ABERS
+# ----------------------------
+# The calibrator refits the base model internally and learns a mapping
+# from the held-out calibration set. Venn-ABERS natively supports
+# multi-class problems.
+
+va_calibrator = VennAbersCalibrator(
+    estimator=RandomForestClassifier(n_estimators=100, max_depth=10, random_state=7),
+    inductive=True,
+    random_state=7,
+)
+va_calibrator.fit(X_train, y_train, X_calib=X_calib, y_calib=y_calib)
+probs_va = va_calibrator.predict_proba(X_test)
+
+####################################################################
+# 3. Multi-class Brier score helper
+# ---------------------------------
+# We compute the mean squared error between predicted probabilities and
+# one-hot encoded labels.
+
+
+def multiclass_brier(y_true: np.ndarray, proba: np.ndarray) -> float:
+    y_onehot = label_binarize(y_true, classes=classes)
+    return float(np.mean(np.sum((y_onehot - proba) ** 2, axis=1)))
+
+
+brier_raw = multiclass_brier(y_test, probs_raw)
+brier_va = multiclass_brier(y_test, probs_va)
+
+####################################################################
+# 4. Reliability diagrams and Brier scores
+# ----------------------------------------
+# We plot one-vs-rest reliability curves for each class before and after
+# calibration. Lower Brier score indicates better calibration.
+
+fig, axes = plt.subplots(1, 2, figsize=(12, 5))
+for cls in classes:
+    y_true_cls = (y_test == cls).astype(int)
+    prob_raw_cls = probs_raw[:, cls]
+    prob_va_cls = probs_va[:, cls]
+
+    frac_pos_raw, mean_pred_raw = calibration_curve(
+        y_true_cls, prob_raw_cls, n_bins=10, strategy="uniform"
+    )
+    frac_pos_va, mean_pred_va = calibration_curve(
+        y_true_cls, prob_va_cls, n_bins=10, strategy="uniform"
+    )
+
+    axes[0].plot(mean_pred_raw, frac_pos_raw, marker="o", label=f"class {cls}")
+    axes[1].plot(mean_pred_va, frac_pos_va, marker="o", label=f"class {cls}")
+
+for ax, title in zip(
+    axes,
+    [
+        f"Before calibration (Brier={brier_raw:.3f})",
+        f"After Venn-ABERS (Brier={brier_va:.3f})",
+    ],
+):
+    ax.plot([0, 1], [0, 1], "k--", linewidth=1)
+    ax.set_xlim(0, 1)
+    ax.set_ylim(0, 1)
+    ax.set_xlabel("Mean predicted probability")
+    ax.set_ylabel("Fraction of positives")
+    ax.set_title(title)
+    ax.grid(True)
+    ax.legend()
+
+plt.tight_layout()
+plt.show()