Skip to content

Commit

Permalink
test calculate_win_rate()
Browse files Browse the repository at this point in the history
  • Loading branch information
hkiyomaru committed Dec 28, 2023
1 parent 79e00cc commit 25694b1
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 2 deletions.
5 changes: 4 additions & 1 deletion llm_judge/show_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ def calculate_win_rate(results: list[dict]):
num_win_2 = 0
num_tie = 0
for result in results:
if result["g1_winner"] == "tie" or result["g1_winner"] != result["g2_winner"]:
if (
"tie" in {result["g1_winner"], result["g2_winner"]}
or result["g1_winner"] != result["g2_winner"]
):
num_tie += 1
elif result["g1_winner"] == "model_1":
num_win_1 += 1
Expand Down
38 changes: 37 additions & 1 deletion tests/test_show_result.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest

from llm_judge.show_result import calculate_average_score
from llm_judge.show_result import calculate_average_score, calculate_win_rate


class TestCalculateAverageScore(unittest.TestCase):
Expand All @@ -10,3 +10,39 @@ def test_calculate_average_score(self):

results = [{"score": 1}, {"score": 2}, {"score": 3}, {"score": 4}]
self.assertEqual(calculate_average_score(results), 2.5)


class TestCalculateWinRate(unittest.TestCase):
def test_calculate_win_rate(self):
results = [
{"g1_winner": "model_1", "g2_winner": "model_1"},
]
self.assertEqual(
calculate_win_rate(results),
{
"model_1": {"win_rate": 1.0, "adjusted_win_rate": 1.0},
"model_2": {"win_rate": 0.0, "adjusted_win_rate": 0.0},
},
)

results = [
{"g1_winner": "model_1", "g2_winner": "model_2"},
]
self.assertEqual(
calculate_win_rate(results),
{
"model_1": {"win_rate": 0.0, "adjusted_win_rate": 0.5},
"model_2": {"win_rate": 0.0, "adjusted_win_rate": 0.5},
},
)

results = [
{"g1_winner": "model_1", "g2_winner": "tie"},
]
self.assertEqual(
calculate_win_rate(results),
{
"model_1": {"win_rate": 0.0, "adjusted_win_rate": 0.5},
"model_2": {"win_rate": 0.0, "adjusted_win_rate": 0.5},
},
)

0 comments on commit 25694b1

Please sign in to comment.