-
Notifications
You must be signed in to change notification settings - Fork 833
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This commit implements the F-beta score metric (#1543)
for the AnswerCorrectness class. The beta parameter is introduced to control the relative importance of recall and precision when calculating the score. Specifically: - beta > 1 places more emphasis on recall. - beta < 1 favors precision. - beta ==1 stands for the regular F1 score that can be interpreted as a harmonic mean of the precision and recall. Key Changes: The method _compute_statement_presence is updated to calculate the F-beta score based on true positives (TP), false positives (FP), and false negatives (FN). This ensures that we can balance between recall and precision, depending on the task's requirements, by tuning the beta value. source: https://scikit-learn.org/1.5/modules/generated/sklearn.metrics.fbeta_score.html --------- Co-authored-by: Shahules786 <[email protected]>
- Loading branch information
1 parent
fd5e805
commit 6d114e5
Showing
5 changed files
with
63 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,22 @@ | ||
from ragas.dataset_schema import EvaluationDataset | ||
from ragas.metrics import ALL_METRICS | ||
from ragas.metrics.base import Metric | ||
from ragas.validation import validate_required_columns | ||
def fbeta_score(tp, fp, fn, beta=1.0): | ||
if tp + fp == 0: | ||
precision = 0 | ||
else: | ||
precision = tp / (tp + fp) | ||
|
||
if tp + fn == 0: | ||
recall = 0 | ||
else: | ||
recall = tp / (tp + fn) | ||
|
||
def get_available_metrics(ds: EvaluationDataset) -> list[Metric]: | ||
""" | ||
Get the available metrics for the given dataset. | ||
E.g. if the dataset contains ("question", "answer", "contexts") columns, | ||
the available metrics are those that can be evaluated in [qa, qac, qc] mode. | ||
""" | ||
available_metrics = [] | ||
for metric in ALL_METRICS: | ||
try: | ||
validate_required_columns(ds, [metric]) | ||
available_metrics.append(metric) | ||
except ValueError: | ||
pass | ||
if precision == 0 and recall == 0: | ||
return 0.0 | ||
|
||
return available_metrics | ||
beta_squared = beta**2 | ||
fbeta = ( | ||
(1 + beta_squared) | ||
* (precision * recall) | ||
/ ((beta_squared * precision) + recall) | ||
) | ||
|
||
return fbeta |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters