@@ -44,7 +44,9 @@ def balanced_accuracy_score(confusion_matrix: np.ndarray) -> float:
44
44
45
45
def precision_recall_fscore (
46
46
confusion_matrix : np .ndarray , beta : float = 1.0 , average : str | None = None
47
- ) -> tuple [float | np .ndarray [float ], float | np .ndarray [float ], float | np .ndarray [float ]]:
47
+ ) -> tuple [
48
+ float | np .ndarray [float ], float | np .ndarray [float ], float | np .ndarray [float ]
49
+ ]:
48
50
"""
49
51
Compute the precision, recall, and f-beta score for the given confusion matrix of a multi-class classification
50
52
model. The three metrics are either returned as class-wise values (if average == None) or averaged using one of the
@@ -92,31 +94,43 @@ def precision_recall_fscore(
92
94
93
95
supported_averages = ["micro" , "macro" , "weighted" , None ]
94
96
if average not in supported_averages :
95
- raise ValueError (f"Invalid { average = } . Supported averages are: { supported_averages } ." )
97
+ raise ValueError (
98
+ f"Invalid { average = } . Supported averages are: { supported_averages } ."
99
+ )
96
100
97
101
if average == "micro" : # compute metrics globally
98
102
accuracy = n_correct / n_samples
99
- return accuracy , accuracy , accuracy # precision, recall, f_score are all the same
103
+ return (
104
+ accuracy ,
105
+ accuracy ,
106
+ accuracy ,
107
+ ) # precision, recall, f_score are all the same
100
108
101
109
predicted_samples_per_class = confusion_matrix .sum (axis = 0 )
102
110
true_samples_per_class = confusion_matrix .sum (axis = 1 )
103
111
correct_predictions_per_class = confusion_matrix .diagonal () # true positives
104
- false_positives_per_class = predicted_samples_per_class - correct_predictions_per_class
112
+ false_positives_per_class = (
113
+ predicted_samples_per_class - correct_predictions_per_class
114
+ )
105
115
false_negatives_per_class = true_samples_per_class - correct_predictions_per_class
106
116
107
117
precision_per_class = correct_predictions_per_class / predicted_samples_per_class
108
118
recall_per_class = correct_predictions_per_class / true_samples_per_class
109
119
# using the f-score definition (1+β²) TP / ((1+β²) TP + β² FN + FP)
110
120
nominator = (1 + beta ** 2 ) * correct_predictions_per_class # (1+β²) TP
111
121
denominator = ( # ((1+β²) TP + β² FN + FP)
112
- (1 + beta ** 2 ) * correct_predictions_per_class + beta ** 2 * false_negatives_per_class + false_positives_per_class
122
+ (1 + beta ** 2 ) * correct_predictions_per_class
123
+ + beta ** 2 * false_negatives_per_class
124
+ + false_positives_per_class
113
125
)
114
126
f_score_per_class = nominator / denominator
115
127
116
128
if average is None : # return raw metrics per class without aggregation
117
129
return precision_per_class , recall_per_class , f_score_per_class
118
130
119
- if average == "weighted" : # average metrics, class weighted by number of true samples with that label
131
+ if (
132
+ average == "weighted"
133
+ ): # average metrics, class weighted by number of true samples with that label
120
134
class_weights = true_samples_per_class
121
135
elif average == "macro" : # average metrics, all classes have the same weight
122
136
class_weights = np .ones_like (true_samples_per_class )
@@ -132,7 +146,9 @@ def average_with_weights(weights, values):
132
146
return precision , recall , f_score
133
147
134
148
135
- def precision_score (confusion_matrix : np .ndarray , average : str | None = None ) -> float | np .ndarray [float ]:
149
+ def precision_score (
150
+ confusion_matrix : np .ndarray , average : str | None = None
151
+ ) -> float | np .ndarray [float ]:
136
152
"""
137
153
Compute the precision score for the given confusion matrix of a multi-class classification model. The result is
138
154
either returned as class-wise values (if average == None) or averaged.
@@ -158,7 +174,9 @@ def precision_score(confusion_matrix: np.ndarray, average: str | None = None) ->
158
174
return precision
159
175
160
176
161
- def recall_score (confusion_matrix : np .ndarray , average : str | None = None ) -> float | np .ndarray [float ]:
177
+ def recall_score (
178
+ confusion_matrix : np .ndarray , average : str | None = None
179
+ ) -> float | np .ndarray [float ]:
162
180
"""
163
181
Compute the recall score for the given confusion matrix of a multi-class classification model. The result is either
164
182
returned as class-wise values (if average == None) or averaged.
@@ -211,13 +229,17 @@ def _f_score_from_precision_and_recall(
211
229
212
230
if isinstance (denominator , np .ndarray ):
213
231
fscore = (1 + beta ** 2 ) * nominator / denominator
214
- fscore [np .logical_and (denominator == 0 , np .isnan (fscore ))] = 0 # replace nan from division by zero with zeros
232
+ fscore [np .logical_and (denominator == 0 , np .isnan (fscore ))] = (
233
+ 0 # replace nan from division by zero with zeros
234
+ )
215
235
return fscore
216
236
else : # scalar case, avoid division by zero for scalar values
217
237
return 0 if (denominator == 0 ) else (1 + beta ** 2 ) * nominator / denominator
218
238
219
239
220
- def fbeta_score (confusion_matrix : np .ndarray , beta : float , average : str | None = None ) -> float | np .ndarray [float ]:
240
+ def fbeta_score (
241
+ confusion_matrix : np .ndarray , beta : float , average : str | None = None
242
+ ) -> float | np .ndarray [float ]:
221
243
"""
222
244
Compute the F-beta score for the given confusion matrix of a multi-class classification model. The result is either
223
245
returned as class-wise values (if average == None) or averaged.
@@ -241,11 +263,15 @@ def fbeta_score(confusion_matrix: np.ndarray, beta: float, average: str | None =
241
263
The f-beta score either class-wise (if average == None) or averaged over all classes using the specified
242
264
averaging method.
243
265
"""
244
- _ , _ , f_score = precision_recall_fscore (confusion_matrix , beta = beta , average = average )
266
+ _ , _ , f_score = precision_recall_fscore (
267
+ confusion_matrix , beta = beta , average = average
268
+ )
245
269
return f_score
246
270
247
271
248
- def f1_score (confusion_matrix : np .ndarray , average : str | None = None ) -> float | np .ndarray [float ]:
272
+ def f1_score (
273
+ confusion_matrix : np .ndarray , average : str | None = None
274
+ ) -> float | np .ndarray [float ]:
249
275
"""
250
276
Compute the F1 score for the given confusion matrix of a multi-class classification model. The result is either
251
277
returned as class-wise values (if average == None) or averaged.
@@ -291,12 +317,20 @@ def cohen_kappa_score(confusion_matrix: np.ndarray) -> float:
291
317
292
318
predicted_samples_per_class = np .sum (confusion_matrix , axis = 0 )
293
319
true_samples_per_class = np .sum (confusion_matrix , axis = 1 )
294
- expected_confusion_matrix = np .outer (predicted_samples_per_class , true_samples_per_class ) / n_samples
320
+ expected_confusion_matrix = (
321
+ np .outer (predicted_samples_per_class , true_samples_per_class ) / n_samples
322
+ )
295
323
296
- expected_accuracy = expected_confusion_matrix .diagonal ().sum () / n_samples # = expected agreement p_e
297
- observed_accuracy = confusion_matrix .diagonal ().sum () / n_samples # = observed agreement p_o
324
+ expected_accuracy = (
325
+ expected_confusion_matrix .diagonal ().sum () / n_samples
326
+ ) # = expected agreement p_e
327
+ observed_accuracy = (
328
+ confusion_matrix .diagonal ().sum () / n_samples
329
+ ) # = observed agreement p_o
298
330
299
- return (observed_accuracy - expected_accuracy ) / (1 - expected_accuracy ) # = Cohen's kappa (p_o - p_e) / (1 - p_e)
331
+ return (observed_accuracy - expected_accuracy ) / (
332
+ 1 - expected_accuracy
333
+ ) # = Cohen's kappa (p_o - p_e) / (1 - p_e)
300
334
301
335
302
336
def matthews_corrcoef (confusion_matrix : np .ndarray ) -> float :
@@ -321,9 +355,19 @@ def matthews_corrcoef(confusion_matrix: np.ndarray) -> float:
321
355
n_correct = confusion_matrix .trace () # = c
322
356
323
357
# MCC = (c * s - t • p) / (sqrt(s^2 - p • p) * sqrt(s^2 - t • t))
324
- nominator_tp = n_correct * n_samples - np .dot (true_samples_per_class , predicted_samples_per_class ) # c * s - t•p
325
- denominator_predicted = n_samples ** 2 - np .dot (predicted_samples_per_class , predicted_samples_per_class ) # s^2 - p•p
326
- denominator_true = n_samples ** 2 - np .dot (true_samples_per_class , true_samples_per_class ) # s^2 - t•t
327
- denominator = np .sqrt (denominator_predicted * denominator_true ) # sqrt(s^2 - p • p) * sqrt(s^2 - t • t)
328
-
329
- return 0 if denominator == 0 else nominator_tp / denominator # MCC = (c*s - t•p) / sqrt((s^2 - p•p) * (s^2 - t•t))
358
+ nominator_tp = n_correct * n_samples - np .dot (
359
+ true_samples_per_class , predicted_samples_per_class
360
+ ) # c * s - t•p
361
+ denominator_predicted = n_samples ** 2 - np .dot (
362
+ predicted_samples_per_class , predicted_samples_per_class
363
+ ) # s^2 - p•p
364
+ denominator_true = n_samples ** 2 - np .dot (
365
+ true_samples_per_class , true_samples_per_class
366
+ ) # s^2 - t•t
367
+ denominator = np .sqrt (
368
+ denominator_predicted * denominator_true
369
+ ) # sqrt(s^2 - p • p) * sqrt(s^2 - t • t)
370
+
371
+ return (
372
+ 0 if denominator == 0 else nominator_tp / denominator
373
+ ) # MCC = (c*s - t•p) / sqrt((s^2 - p•p) * (s^2 - t•t))
0 commit comments