11import logging
2- from typing import List
3-
2+ from typing import List , Optional , Dict
3+ from smclarify .bias .metrics .common import divide , binary_confusion_matrix
4+ from sklearn .metrics import confusion_matrix as sklearn_confusion_matrix
5+ from pandas .api .types import CategoricalDtype
46import pandas as pd
5- from . common import divide
7+ from functional import seq
68
7- log = logging .getLogger (__name__ )
9+ logger = logging .getLogger (__name__ )
810
911
1012def confusion_matrix (
@@ -20,15 +22,14 @@ def confusion_matrix(
2022 :param sensitive_facet_index: boolean column indicating sensitive group
2123 :param positive_label_index: boolean column indicating positive labels
2224 :param positive_predicted_label_index: boolean column indicating positive predicted labels
23- :return list of fractions of true positives, false positives, false negatives, true negatives
25+ :return fractions of true positives, false positives, false negatives, true negatives for
26+ the sensitive facet only (sensitive_facet = True)
2427 """
25- TP_d = len (feature [positive_label_index & positive_predicted_label_index & sensitive_facet_index ])
26- FN_d = len (feature [positive_label_index & (~ positive_predicted_label_index ) & sensitive_facet_index ])
27-
28- TN_d = len (feature [(~ positive_label_index ) & (~ positive_predicted_label_index ) & sensitive_facet_index ])
29- FP_d = len (feature [(~ positive_label_index ) & positive_predicted_label_index & sensitive_facet_index ])
30- size = len (feature [sensitive_facet_index ])
31- return [divide (TP_d , size ), divide (FP_d , size ), divide (FN_d , size ), divide (TN_d , size )]
28+ return binary_confusion_matrix (
29+ feature [sensitive_facet_index ],
30+ positive_label_index [sensitive_facet_index ],
31+ positive_predicted_label_index [sensitive_facet_index ],
32+ )
3233
3334
3435def proportion (sensitive_facet_index : pd .Series ) -> float :
@@ -39,3 +40,190 @@ def proportion(sensitive_facet_index: pd.Series) -> float:
3940 :return: the fraction of examples in the sensitive facet.
4041 """
4142 return sum (sensitive_facet_index ) / len (sensitive_facet_index )
43+
44+
45+ def observed_label_distribution (
46+ feature : pd .DataFrame , sensitive_facet_index : pd .Series , positive_label_index : pd .Series
47+ ) -> List [float ]:
48+ r"""
49+ Distribution of observed label outcomes for sensitive facet
50+
51+ :param feature: input feature
52+ :param sensitive_facet_index: boolean column indicating sensitive group
53+ :param positive_label_index: boolean column indicating positive labels
54+ :return: List of Proportion of positive and negative label outcomes
55+ """
56+ pos = len (feature [sensitive_facet_index & positive_label_index ])
57+ n = len (feature [sensitive_facet_index ])
58+ proportion_pos = divide (pos , n )
59+ return [proportion_pos , 1 - proportion_pos ]
60+
61+
62+ # Model Performance Metrics
63+ def accuracy (TP : int , FP : int , TN : int , FN : int ) -> float :
64+ r"""
65+ Proportion of inputs assigned the correct predicted label by the model.
66+
67+ :param: TP Counts of labels which were correctly predicted positive
68+ :param: FP Counts of labels which were incorrectly predicted positive
69+ :param: TN Counts of labels which were correctly predicted negative
70+ :param: FN Counts of labels which were incorrectly predicted negative
71+ :return: Proportion of inputs assigned the correct predicted label by the model.
72+ """
73+ return divide (TN + TP , TN + FP + FN + TP )
74+
75+
76+ def PPL (TP : int , FP : int , TN : int , FN : int ) -> float :
77+ r"""
78+ Proportion of input assigned in positive predicted label.
79+
80+ :param: TP: Counts of labels which were correctly predicted positive
81+ :param: FP: Counts of labels which were incorrectly predicted positive
82+ :param: TN: Counts of labels which were correctly predicted negative
83+ :param: FN: Counts of labels which were incorrectly predicted negative
84+ :return: Proportion of inputs assigned the positive predicted label.
85+ """
86+ return divide (TP + FP , TN + FP + FN + TP )
87+
88+
89+ def PNL (TP : int , FP : int , TN : int , FN : int ) -> float :
90+ r"""
91+ Proportion of input assigned the negative predicted label.
92+
93+ :param: TP: Counts of labels which were correctly predicted positive
94+ :param: FP: Counts of labels which were incorrectly predicted positive
95+ :param: TN: Counts of labels which were correctly predicted negative
96+ :param: FN: Counts of labels which were incorrectly predicted negative
97+ :return: Proportion of inputs assigned the negative predicted label.
98+ """
99+ return divide (TN + FN , TN + FP + FN + TP )
100+
101+
102+ def recall (TP : int , FN : int ) -> float :
103+ r"""
104+ Proportion of inputs with positive observed label correctly assigned the positive predicted label.
105+
106+ :param: TP Counts of labels which were correctly predicted positive
107+ :param: FN Counts of labels which were incorrectly predicted negative
108+ :return: Proportion of inputs with positive observed label correctly assigned the positive predicted label.
109+ """
110+ return divide (TP , TP + FN )
111+
112+
113+ def specificity (TN : int , FP : int ) -> float :
114+ r"""
115+ Proportion of inputs with negative observed label correctly assigned the negative predicted label.
116+
117+ :param: FP Counts of labels which were incorrectly predicted positive
118+ :param: TN Counts of labels which were correctly predicted negative
119+ :return: Proportion of inputs with negative observed label correctly assigned the negative predicted label.
120+ """
121+ return divide (TN , TN + FP )
122+
123+
124+ def precision (TP : int , FP : int ) -> float :
125+ r"""
126+ Proportion of inputs with positive predicted label that actually have a positive observed label.
127+
128+ :param: TP Counts of labels which were correctly predicted positive
129+ :param: FP Counts of labels which were incorrectly predicted positive
130+ :return: Proportion of inputs with positive predicted label that actually have a positive observed label.
131+ """
132+ return divide (TP , TP + FP )
133+
134+
135+ def rejection_rate (TN : int , FN : int ) -> float :
136+ r"""
137+ Proportion of inputs with negative predicted label that actually have a negative observed label.
138+
139+ :param: TN Counts of labels which were correctly predicted negative
140+ :param: FN Counts of labels which were incorrectly predicted negative
141+ :return: Proportion of inputs with negative predicted label that actually have a negative observed label.
142+ """
143+ return divide (TN , TN + FN )
144+
145+
146+ def conditional_acceptance (TP : int , FP : int , FN : int ) -> float :
147+ r"""
148+ Ratio between the positive observed labels and positive predicted labels.
149+
150+ :param: TP Counts of labels which were correctly predicted positive
151+ :param: FP Counts of labels which were incorrectly predicted positive
152+ :param: FN Counts of labels which were incorrectly predicted negative
153+ :return: Ratio between the positive observed labels and positive predicted labels.
154+ """
155+ return divide (TP + FN , TP + FP )
156+
157+
158+ def conditional_rejection (FP : int , TN : int , FN : int ) -> float :
159+ r"""
160+ Ratio between the negative observed labels and negative predicted labels.
161+
162+ :param: FP Counts of labels which were incorrectly predicted positive
163+ :param: TN Counts of labels which were correctly predicted negative
164+ :param: FN Counts of labels which were incorrectly predicted negative
165+ :return: Ratio between the negative observed labels and negative predicted labels.
166+ """
167+ return divide (TN + FP , TN + FN )
168+
169+
170+ def f1_score (TP : int , FP : int , FN : int ) -> float :
171+ r"""
172+ Harmonic mean of precision and recall.
173+
174+ :param: TP Counts of labels which were correctly predicted positive
175+ :param: FP Counts of labels which were incorrectly predicted positive
176+ :param: FN Counts of labels which were incorrectly predicted negative
177+ :return: Harmonic mean of precision and recall.
178+ """
179+ precision_score = precision (TP , FP )
180+ recall_score = recall (TP , FN )
181+ return 2 * divide (precision_score * recall_score , precision_score + recall_score )
182+
183+
184+ # Model Performance Metrics
185+ def multicategory_confusion_matrix (
186+ label_series : pd .Series , predicted_label_series : pd .Series
187+ ) -> Optional [Dict [str , Dict ]]:
188+ """
189+ Confusion Matrix for categorical label cases.
190+ :param label_series: Label Data Series
191+ :param predicted_label_series: Predicted Label Data Series
192+ :return: Matrix JSON where rows refer to true labels, and columns refer to predicted labels
193+ """
194+ # Handle differing pd.Series dtypes
195+ unique_label_values = list (label_series .unique ())
196+ unique_label_values .sort ()
197+ if label_series .dtype .name != predicted_label_series .dtype .name :
198+ try :
199+ predicted_label_series = predicted_label_series .astype (label_series .dtype )
200+ except Exception as e :
201+ logger .warning (
202+ f"Predicted Label Series type { predicted_label_series .dtype .name } could not be cast as Label Series type { label_series .dtype .name } . "
203+ f"Multicategory Confusion Matrix won't be computed due to: { e } "
204+ )
205+ return None
206+ # Handle CategoricalDtype difference (see test/integration/test_bias_metrics)
207+ if label_series .dtype == "category" :
208+ try :
209+ pred_label_category = predicted_label_series .dtype .categories .astype (label_series .dtype .categories .dtype )
210+ category_obj = CategoricalDtype (pred_label_category , label_series .dtype .ordered )
211+ predicted_label_series = predicted_label_series .astype (category_obj )
212+ except Exception as e :
213+ logger .warning (
214+ f"Predicted Label Series could not be cast as Label Series type. "
215+ f"Multicategory Confusion Matrix won't be computed due to: { e } "
216+ )
217+ return None
218+ confusion_matrix_array = sklearn_confusion_matrix (label_series , predicted_label_series , labels = unique_label_values )
219+ assert confusion_matrix_array .shape == (
220+ len (unique_label_values ),
221+ len (unique_label_values ),
222+ )
223+ matrix_json = {}
224+ unique_label_strings = [str (val ) for val in unique_label_values ]
225+ for index , val in enumerate (unique_label_strings ):
226+ confusion_matrix_floats = [float (cfn_val ) for cfn_val in confusion_matrix_array [index ]]
227+ matrix_json [val ] = seq (unique_label_strings ).zip (confusion_matrix_floats ).dict ()
228+
229+ return matrix_json
0 commit comments