Skip to content

Commit 1edaf23

Browse files
xiaoyi-chengPranav Krishnan
andauthored
add basic stat metric, and model performance report (#136)
add basic stat metric, and model performance report Co-authored-by: Pranav Krishnan <[email protected]>
1 parent 9a47250 commit 1edaf23

File tree

6 files changed

+705
-21
lines changed

6 files changed

+705
-21
lines changed
Lines changed: 200 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import logging
2-
from typing import List
3-
2+
from typing import List, Optional, Dict
3+
from smclarify.bias.metrics.common import divide, binary_confusion_matrix
4+
from sklearn.metrics import confusion_matrix as sklearn_confusion_matrix
5+
from pandas.api.types import CategoricalDtype
46
import pandas as pd
5-
from .common import divide
7+
from functional import seq
68

7-
log = logging.getLogger(__name__)
9+
logger = logging.getLogger(__name__)
810

911

1012
def confusion_matrix(
@@ -20,15 +22,14 @@ def confusion_matrix(
2022
:param sensitive_facet_index: boolean column indicating sensitive group
2123
:param positive_label_index: boolean column indicating positive labels
2224
:param positive_predicted_label_index: boolean column indicating positive predicted labels
23-
:return list of fractions of true positives, false positives, false negatives, true negatives
25+
:return fractions of true positives, false positives, false negatives, true negatives for
26+
the sensitive facet only (sensitive_facet = True)
2427
"""
25-
TP_d = len(feature[positive_label_index & positive_predicted_label_index & sensitive_facet_index])
26-
FN_d = len(feature[positive_label_index & (~positive_predicted_label_index) & sensitive_facet_index])
27-
28-
TN_d = len(feature[(~positive_label_index) & (~positive_predicted_label_index) & sensitive_facet_index])
29-
FP_d = len(feature[(~positive_label_index) & positive_predicted_label_index & sensitive_facet_index])
30-
size = len(feature[sensitive_facet_index])
31-
return [divide(TP_d, size), divide(FP_d, size), divide(FN_d, size), divide(TN_d, size)]
28+
return binary_confusion_matrix(
29+
feature[sensitive_facet_index],
30+
positive_label_index[sensitive_facet_index],
31+
positive_predicted_label_index[sensitive_facet_index],
32+
)
3233

3334

3435
def proportion(sensitive_facet_index: pd.Series) -> float:
@@ -39,3 +40,190 @@ def proportion(sensitive_facet_index: pd.Series) -> float:
3940
:return: the fraction of examples in the sensitive facet.
4041
"""
4142
return sum(sensitive_facet_index) / len(sensitive_facet_index)
43+
44+
45+
def observed_label_distribution(
46+
feature: pd.DataFrame, sensitive_facet_index: pd.Series, positive_label_index: pd.Series
47+
) -> List[float]:
48+
r"""
49+
Distribution of observed label outcomes for sensitive facet
50+
51+
:param feature: input feature
52+
:param sensitive_facet_index: boolean column indicating sensitive group
53+
:param positive_label_index: boolean column indicating positive labels
54+
:return: List of Proportion of positive and negative label outcomes
55+
"""
56+
pos = len(feature[sensitive_facet_index & positive_label_index])
57+
n = len(feature[sensitive_facet_index])
58+
proportion_pos = divide(pos, n)
59+
return [proportion_pos, 1 - proportion_pos]
60+
61+
62+
# Model Performance Metrics
63+
def accuracy(TP: int, FP: int, TN: int, FN: int) -> float:
64+
r"""
65+
Proportion of inputs assigned the correct predicted label by the model.
66+
67+
:param: TP Counts of labels which were correctly predicted positive
68+
:param: FP Counts of labels which were incorrectly predicted positive
69+
:param: TN Counts of labels which were correctly predicted negative
70+
:param: FN Counts of labels which were incorrectly predicted negative
71+
:return: Proportion of inputs assigned the correct predicted label by the model.
72+
"""
73+
return divide(TN + TP, TN + FP + FN + TP)
74+
75+
76+
def PPL(TP: int, FP: int, TN: int, FN: int) -> float:
77+
r"""
78+
Proportion of input assigned in positive predicted label.
79+
80+
:param: TP: Counts of labels which were correctly predicted positive
81+
:param: FP: Counts of labels which were incorrectly predicted positive
82+
:param: TN: Counts of labels which were correctly predicted negative
83+
:param: FN: Counts of labels which were incorrectly predicted negative
84+
:return: Proportion of inputs assigned the positive predicted label.
85+
"""
86+
return divide(TP + FP, TN + FP + FN + TP)
87+
88+
89+
def PNL(TP: int, FP: int, TN: int, FN: int) -> float:
90+
r"""
91+
Proportion of input assigned the negative predicted label.
92+
93+
:param: TP: Counts of labels which were correctly predicted positive
94+
:param: FP: Counts of labels which were incorrectly predicted positive
95+
:param: TN: Counts of labels which were correctly predicted negative
96+
:param: FN: Counts of labels which were incorrectly predicted negative
97+
:return: Proportion of inputs assigned the negative predicted label.
98+
"""
99+
return divide(TN + FN, TN + FP + FN + TP)
100+
101+
102+
def recall(TP: int, FN: int) -> float:
103+
r"""
104+
Proportion of inputs with positive observed label correctly assigned the positive predicted label.
105+
106+
:param: TP Counts of labels which were correctly predicted positive
107+
:param: FN Counts of labels which were incorrectly predicted negative
108+
:return: Proportion of inputs with positive observed label correctly assigned the positive predicted label.
109+
"""
110+
return divide(TP, TP + FN)
111+
112+
113+
def specificity(TN: int, FP: int) -> float:
114+
r"""
115+
Proportion of inputs with negative observed label correctly assigned the negative predicted label.
116+
117+
:param: FP Counts of labels which were incorrectly predicted positive
118+
:param: TN Counts of labels which were correctly predicted negative
119+
:return: Proportion of inputs with negative observed label correctly assigned the negative predicted label.
120+
"""
121+
return divide(TN, TN + FP)
122+
123+
124+
def precision(TP: int, FP: int) -> float:
125+
r"""
126+
Proportion of inputs with positive predicted label that actually have a positive observed label.
127+
128+
:param: TP Counts of labels which were correctly predicted positive
129+
:param: FP Counts of labels which were incorrectly predicted positive
130+
:return: Proportion of inputs with positive predicted label that actually have a positive observed label.
131+
"""
132+
return divide(TP, TP + FP)
133+
134+
135+
def rejection_rate(TN: int, FN: int) -> float:
136+
r"""
137+
Proportion of inputs with negative predicted label that actually have a negative observed label.
138+
139+
:param: TN Counts of labels which were correctly predicted negative
140+
:param: FN Counts of labels which were incorrectly predicted negative
141+
:return: Proportion of inputs with negative predicted label that actually have a negative observed label.
142+
"""
143+
return divide(TN, TN + FN)
144+
145+
146+
def conditional_acceptance(TP: int, FP: int, FN: int) -> float:
147+
r"""
148+
Ratio between the positive observed labels and positive predicted labels.
149+
150+
:param: TP Counts of labels which were correctly predicted positive
151+
:param: FP Counts of labels which were incorrectly predicted positive
152+
:param: FN Counts of labels which were incorrectly predicted negative
153+
:return: Ratio between the positive observed labels and positive predicted labels.
154+
"""
155+
return divide(TP + FN, TP + FP)
156+
157+
158+
def conditional_rejection(FP: int, TN: int, FN: int) -> float:
159+
r"""
160+
Ratio between the negative observed labels and negative predicted labels.
161+
162+
:param: FP Counts of labels which were incorrectly predicted positive
163+
:param: TN Counts of labels which were correctly predicted negative
164+
:param: FN Counts of labels which were incorrectly predicted negative
165+
:return: Ratio between the negative observed labels and negative predicted labels.
166+
"""
167+
return divide(TN + FP, TN + FN)
168+
169+
170+
def f1_score(TP: int, FP: int, FN: int) -> float:
171+
r"""
172+
Harmonic mean of precision and recall.
173+
174+
:param: TP Counts of labels which were correctly predicted positive
175+
:param: FP Counts of labels which were incorrectly predicted positive
176+
:param: FN Counts of labels which were incorrectly predicted negative
177+
:return: Harmonic mean of precision and recall.
178+
"""
179+
precision_score = precision(TP, FP)
180+
recall_score = recall(TP, FN)
181+
return 2 * divide(precision_score * recall_score, precision_score + recall_score)
182+
183+
184+
# Model Performance Metrics
185+
def multicategory_confusion_matrix(
186+
label_series: pd.Series, predicted_label_series: pd.Series
187+
) -> Optional[Dict[str, Dict]]:
188+
"""
189+
Confusion Matrix for categorical label cases.
190+
:param label_series: Label Data Series
191+
:param predicted_label_series: Predicted Label Data Series
192+
:return: Matrix JSON where rows refer to true labels, and columns refer to predicted labels
193+
"""
194+
# Handle differing pd.Series dtypes
195+
unique_label_values = list(label_series.unique())
196+
unique_label_values.sort()
197+
if label_series.dtype.name != predicted_label_series.dtype.name:
198+
try:
199+
predicted_label_series = predicted_label_series.astype(label_series.dtype)
200+
except Exception as e:
201+
logger.warning(
202+
f"Predicted Label Series type {predicted_label_series.dtype.name} could not be cast as Label Series type {label_series.dtype.name}. "
203+
f"Multicategory Confusion Matrix won't be computed due to: {e}"
204+
)
205+
return None
206+
# Handle CategoricalDtype difference (see test/integration/test_bias_metrics)
207+
if label_series.dtype == "category":
208+
try:
209+
pred_label_category = predicted_label_series.dtype.categories.astype(label_series.dtype.categories.dtype)
210+
category_obj = CategoricalDtype(pred_label_category, label_series.dtype.ordered)
211+
predicted_label_series = predicted_label_series.astype(category_obj)
212+
except Exception as e:
213+
logger.warning(
214+
f"Predicted Label Series could not be cast as Label Series type. "
215+
f"Multicategory Confusion Matrix won't be computed due to: {e}"
216+
)
217+
return None
218+
confusion_matrix_array = sklearn_confusion_matrix(label_series, predicted_label_series, labels=unique_label_values)
219+
assert confusion_matrix_array.shape == (
220+
len(unique_label_values),
221+
len(unique_label_values),
222+
)
223+
matrix_json = {}
224+
unique_label_strings = [str(val) for val in unique_label_values]
225+
for index, val in enumerate(unique_label_strings):
226+
confusion_matrix_floats = [float(cfn_val) for cfn_val in confusion_matrix_array[index]]
227+
matrix_json[val] = seq(unique_label_strings).zip(confusion_matrix_floats).dict()
228+
229+
return matrix_json

src/smclarify/bias/metrics/common.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,26 @@ def metric_description(metric: Callable[..., float]) -> str:
4949
return metric.__doc__.lstrip().split("\n")[0] # type: ignore
5050

5151

52+
def binary_confusion_matrix(
53+
feature: pd.Series, positive_label_index: pd.Series, positive_predicted_label_index: pd.Series
54+
) -> List[int]:
55+
assert len(feature) == len(positive_label_index) == len(positive_predicted_label_index)
56+
TP, TN, FP, FN = calc_confusion_matrix_quadrants(feature, positive_label_index, positive_predicted_label_index)
57+
n = len(feature)
58+
return [divide(TP, n), divide(FP, n), divide(FN, n), divide(TN, n)]
59+
60+
61+
def calc_confusion_matrix_quadrants(
62+
feature: pd.Series, positive_label_index: pd.Series, positive_predicted_label_index: pd.Series
63+
) -> Tuple[int, int, int, int]:
64+
TP = len(feature[positive_label_index & positive_predicted_label_index])
65+
TN = len(feature[~positive_label_index & (~positive_predicted_label_index)])
66+
67+
FP = len(feature[(~positive_label_index) & positive_predicted_label_index])
68+
FN = len(feature[(positive_label_index) & (~positive_predicted_label_index)])
69+
return TP, TN, FP, FN
70+
71+
5272
def DPL(feature: pd.Series, sensitive_facet_index: pd.Series, positive_label_index: pd.Series) -> float:
5373
require(sensitive_facet_index.dtype == bool, "sensitive_facet_index must be of type bool")
5474
require(positive_label_index.dtype == bool, "label_index must be of type bool")

0 commit comments

Comments
 (0)