Skip to content

Commit

Permalink
use the exactly the same code to determine the winner in common.py an…
Browse files Browse the repository at this point in the history
…d reparse_pairwise_judgement.py
  • Loading branch information
hkiyomaru committed Dec 27, 2023
1 parent 7b2d938 commit 9049ba1
Showing 2 changed files with 29 additions and 28 deletions.
41 changes: 21 additions & 20 deletions llm_judge/common.py
Original file line number Diff line number Diff line change
@@ -167,26 +167,27 @@ def play(answer_a, answer_b):
}
if self.ref_answer is not None:
kwargs["ref_answer_1"] = self.ref_answer["choices"][0]["turns"][0]
judgment = self.judge.judge(**kwargs)

if "[[A]]" in judgment:
winner = "A"
elif "[[B]]" in judgment:
winner = "B"
elif "[[C]]" in judgment:
winner = "tie"
else:
winner = "error"

return winner, judgment

g1_winner, g1_judgment = play(self.answer_1, self.answer_2)
winner_map = {"A": "model_1", "B": "model_2"}
g1_winner = winner_map.get(g1_winner, g1_winner)

g2_winner, g2_judgment = play(self.answer_2, self.answer_1)
winner_map = {"A": "model_2", "B": "model_1"}
g2_winner = winner_map.get(g2_winner, g2_winner)
return self.judge.judge(**kwargs)

g1_judgment = play(self.answer_1, self.answer_2)
if "[[A]]" in g1_judgment:
g1_winner = "model_1"
elif "[[B]]" in g1_judgment:
g1_winner = "model_2"
elif "[[C]]" in g1_judgment:
g1_winner = "tie"
else:
g1_winner = "error"

g2_judgment = play(self.answer_2, self.answer_1)
if "[[A]]" in g2_judgment:
g2_winner = "model_2"
elif "[[B]]" in g2_judgment:
g2_winner = "model_1"
elif "[[C]]" in g2_judgment:
g2_winner = "tie"
else:
g2_winner = "error"

result = {
"model_1": self.model_1,
16 changes: 8 additions & 8 deletions llm_judge/reparse_pairwise_judgement.py
Original file line number Diff line number Diff line change
@@ -15,23 +15,23 @@ def reparse_result_pairwise(result: dict) -> dict:
"""
reparsed_result = result.copy()

g1_judgement = result["g1_judgment"]
if "[[A]]" in g1_judgement:
g1_judgment = result["g1_judgment"]
if "[[A]]" in g1_judgment:
g1_winner = "model_1"
elif "[[B]]" in g1_judgement:
elif "[[B]]" in g1_judgment:
g1_winner = "model_2"
elif "[[C]]" in g1_judgement:
elif "[[C]]" in g1_judgment:
g1_winner = "tie"
else:
g1_winner = "error"
reparsed_result["g1_winner"] = g1_winner

g2_judgement = result["g2_judgment"]
if "[[A]]" in g2_judgement:
g2_judgment = result["g2_judgment"]
if "[[A]]" in g2_judgment:
g2_winner = "model_2"
elif "[[B]]" in g2_judgement:
elif "[[B]]" in g2_judgment:
g2_winner = "model_1"
elif "[[C]]" in g2_judgement:
elif "[[C]]" in g2_judgment:
g2_winner = "tie"
else:
g2_winner = "error"

0 comments on commit 9049ba1

Please sign in to comment.