Skip to content

Commit 1efb539

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 8701666 commit 1efb539

File tree

3 files changed

+281
-85
lines changed

3 files changed

+281
-85
lines changed

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ distributed_random_forest.evaluate(local_test.x, local_test.y, num_classes, glob
113113

114114
## Evaluation Metrics
115115

116-
To ease the evaluation of large-scale datasets, we implement multi-class evaluation metrics operating directly on the confusion matrix (instead of the true vs predicted values for all samples).
116+
To ease the evaluation of large-scale datasets, we implement multi-class evaluation metrics operating directly on the confusion matrix (instead of the true vs predicted values for all samples).
117117

118118
We support the following metrics, with the interfaces based on the corresponding `sklearn.metrics` functions:
119119
- **Accuracy:** the global accuracy
@@ -132,7 +132,7 @@ We support the following metrics, with the interfaces based on the corresponding
132132
```python3
133133
import numpy as np
134134
from specialcouscous import evaluation_metrics
135-
135+
136136
path_to_confusion_matrix_csv = "example.csv"
137137
confusion_matrix = np.loadtxt(path_to_confusion_matrix_csv)
138138

specialcouscous/evaluation_metrics.py

+66-22
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ def balanced_accuracy_score(confusion_matrix: np.ndarray) -> float:
4444

4545
def precision_recall_fscore(
4646
confusion_matrix: np.ndarray, beta: float = 1.0, average: str | None = None
47-
) -> tuple[float | np.ndarray[float], float | np.ndarray[float], float | np.ndarray[float]]:
47+
) -> tuple[
48+
float | np.ndarray[float], float | np.ndarray[float], float | np.ndarray[float]
49+
]:
4850
"""
4951
Compute the precision, recall, and f-beta score for the given confusion matrix of a multi-class classification
5052
model. The three metrics are either returned as class-wise values (if average == None) or averaged using one of the
@@ -92,31 +94,43 @@ def precision_recall_fscore(
9294

9395
supported_averages = ["micro", "macro", "weighted", None]
9496
if average not in supported_averages:
95-
raise ValueError(f"Invalid {average=}. Supported averages are: {supported_averages}.")
97+
raise ValueError(
98+
f"Invalid {average=}. Supported averages are: {supported_averages}."
99+
)
96100

97101
if average == "micro": # compute metrics globally
98102
accuracy = n_correct / n_samples
99-
return accuracy, accuracy, accuracy # precision, recall, f_score are all the same
103+
return (
104+
accuracy,
105+
accuracy,
106+
accuracy,
107+
) # precision, recall, f_score are all the same
100108

101109
predicted_samples_per_class = confusion_matrix.sum(axis=0)
102110
true_samples_per_class = confusion_matrix.sum(axis=1)
103111
correct_predictions_per_class = confusion_matrix.diagonal() # true positives
104-
false_positives_per_class = predicted_samples_per_class - correct_predictions_per_class
112+
false_positives_per_class = (
113+
predicted_samples_per_class - correct_predictions_per_class
114+
)
105115
false_negatives_per_class = true_samples_per_class - correct_predictions_per_class
106116

107117
precision_per_class = correct_predictions_per_class / predicted_samples_per_class
108118
recall_per_class = correct_predictions_per_class / true_samples_per_class
109119
# using the f-score definition (1+β²) TP / ((1+β²) TP + β² FN + FP)
110120
nominator = (1 + beta**2) * correct_predictions_per_class # (1+β²) TP
111121
denominator = ( # ((1+β²) TP + β² FN + FP)
112-
(1 + beta**2) * correct_predictions_per_class + beta**2 * false_negatives_per_class + false_positives_per_class
122+
(1 + beta**2) * correct_predictions_per_class
123+
+ beta**2 * false_negatives_per_class
124+
+ false_positives_per_class
113125
)
114126
f_score_per_class = nominator / denominator
115127

116128
if average is None: # return raw metrics per class without aggregation
117129
return precision_per_class, recall_per_class, f_score_per_class
118130

119-
if average == "weighted": # average metrics, class weighted by number of true samples with that label
131+
if (
132+
average == "weighted"
133+
): # average metrics, class weighted by number of true samples with that label
120134
class_weights = true_samples_per_class
121135
elif average == "macro": # average metrics, all classes have the same weight
122136
class_weights = np.ones_like(true_samples_per_class)
@@ -132,7 +146,9 @@ def average_with_weights(weights, values):
132146
return precision, recall, f_score
133147

134148

135-
def precision_score(confusion_matrix: np.ndarray, average: str | None = None) -> float | np.ndarray[float]:
149+
def precision_score(
150+
confusion_matrix: np.ndarray, average: str | None = None
151+
) -> float | np.ndarray[float]:
136152
"""
137153
Compute the precision score for the given confusion matrix of a multi-class classification model. The result is
138154
either returned as class-wise values (if average == None) or averaged.
@@ -158,7 +174,9 @@ def precision_score(confusion_matrix: np.ndarray, average: str | None = None) ->
158174
return precision
159175

160176

161-
def recall_score(confusion_matrix: np.ndarray, average: str | None = None) -> float | np.ndarray[float]:
177+
def recall_score(
178+
confusion_matrix: np.ndarray, average: str | None = None
179+
) -> float | np.ndarray[float]:
162180
"""
163181
Compute the recall score for the given confusion matrix of a multi-class classification model. The result is either
164182
returned as class-wise values (if average == None) or averaged.
@@ -211,13 +229,17 @@ def _f_score_from_precision_and_recall(
211229

212230
if isinstance(denominator, np.ndarray):
213231
fscore = (1 + beta**2) * nominator / denominator
214-
fscore[np.logical_and(denominator == 0, np.isnan(fscore))] = 0 # replace nan from division by zero with zeros
232+
fscore[np.logical_and(denominator == 0, np.isnan(fscore))] = (
233+
0 # replace nan from division by zero with zeros
234+
)
215235
return fscore
216236
else: # scalar case, avoid division by zero for scalar values
217237
return 0 if (denominator == 0) else (1 + beta**2) * nominator / denominator
218238

219239

220-
def fbeta_score(confusion_matrix: np.ndarray, beta: float, average: str | None = None) -> float | np.ndarray[float]:
240+
def fbeta_score(
241+
confusion_matrix: np.ndarray, beta: float, average: str | None = None
242+
) -> float | np.ndarray[float]:
221243
"""
222244
Compute the F-beta score for the given confusion matrix of a multi-class classification model. The result is either
223245
returned as class-wise values (if average == None) or averaged.
@@ -241,11 +263,15 @@ def fbeta_score(confusion_matrix: np.ndarray, beta: float, average: str | None =
241263
The f-beta score either class-wise (if average == None) or averaged over all classes using the specified
242264
averaging method.
243265
"""
244-
_, _, f_score = precision_recall_fscore(confusion_matrix, beta=beta, average=average)
266+
_, _, f_score = precision_recall_fscore(
267+
confusion_matrix, beta=beta, average=average
268+
)
245269
return f_score
246270

247271

248-
def f1_score(confusion_matrix: np.ndarray, average: str | None = None) -> float | np.ndarray[float]:
272+
def f1_score(
273+
confusion_matrix: np.ndarray, average: str | None = None
274+
) -> float | np.ndarray[float]:
249275
"""
250276
Compute the F1 score for the given confusion matrix of a multi-class classification model. The result is either
251277
returned as class-wise values (if average == None) or averaged.
@@ -291,12 +317,20 @@ def cohen_kappa_score(confusion_matrix: np.ndarray) -> float:
291317

292318
predicted_samples_per_class = np.sum(confusion_matrix, axis=0)
293319
true_samples_per_class = np.sum(confusion_matrix, axis=1)
294-
expected_confusion_matrix = np.outer(predicted_samples_per_class, true_samples_per_class) / n_samples
320+
expected_confusion_matrix = (
321+
np.outer(predicted_samples_per_class, true_samples_per_class) / n_samples
322+
)
295323

296-
expected_accuracy = expected_confusion_matrix.diagonal().sum() / n_samples # = expected agreement p_e
297-
observed_accuracy = confusion_matrix.diagonal().sum() / n_samples # = observed agreement p_o
324+
expected_accuracy = (
325+
expected_confusion_matrix.diagonal().sum() / n_samples
326+
) # = expected agreement p_e
327+
observed_accuracy = (
328+
confusion_matrix.diagonal().sum() / n_samples
329+
) # = observed agreement p_o
298330

299-
return (observed_accuracy - expected_accuracy) / (1 - expected_accuracy) # = Cohen's kappa (p_o - p_e) / (1 - p_e)
331+
return (observed_accuracy - expected_accuracy) / (
332+
1 - expected_accuracy
333+
) # = Cohen's kappa (p_o - p_e) / (1 - p_e)
300334

301335

302336
def matthews_corrcoef(confusion_matrix: np.ndarray) -> float:
@@ -321,9 +355,19 @@ def matthews_corrcoef(confusion_matrix: np.ndarray) -> float:
321355
n_correct = confusion_matrix.trace() # = c
322356

323357
# MCC = (c * s - t • p) / (sqrt(s^2 - p • p) * sqrt(s^2 - t • t))
324-
nominator_tp = n_correct * n_samples - np.dot(true_samples_per_class, predicted_samples_per_class) # c * s - t•p
325-
denominator_predicted = n_samples**2 - np.dot(predicted_samples_per_class, predicted_samples_per_class) # s^2 - p•p
326-
denominator_true = n_samples**2 - np.dot(true_samples_per_class, true_samples_per_class) # s^2 - t•t
327-
denominator = np.sqrt(denominator_predicted * denominator_true) # sqrt(s^2 - p • p) * sqrt(s^2 - t • t)
328-
329-
return 0 if denominator == 0 else nominator_tp / denominator # MCC = (c*s - t•p) / sqrt((s^2 - p•p) * (s^2 - t•t))
358+
nominator_tp = n_correct * n_samples - np.dot(
359+
true_samples_per_class, predicted_samples_per_class
360+
) # c * s - t•p
361+
denominator_predicted = n_samples**2 - np.dot(
362+
predicted_samples_per_class, predicted_samples_per_class
363+
) # s^2 - p•p
364+
denominator_true = n_samples**2 - np.dot(
365+
true_samples_per_class, true_samples_per_class
366+
) # s^2 - t•t
367+
denominator = np.sqrt(
368+
denominator_predicted * denominator_true
369+
) # sqrt(s^2 - p • p) * sqrt(s^2 - t • t)
370+
371+
return (
372+
0 if denominator == 0 else nominator_tp / denominator
373+
) # MCC = (c*s - t•p) / sqrt((s^2 - p•p) * (s^2 - t•t))

0 commit comments

Comments
 (0)