-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathchoubun_metrics.py
61 lines (46 loc) · 2.28 KB
/
choubun_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import re
import string
from collections import Counter
from rouge import Rouge
def normalize_ja_answer(s):
"""Lower text and remove punctuation, extra whitespace."""
def white_space_fix(text):
return "".join(text.split())
def remove_punc(text):
cn_punctuation = "!?。。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏."
all_punctuation = set(string.punctuation + cn_punctuation)
return "".join(ch for ch in text if ch not in all_punctuation)
def lower(text):
return text.lower()
return white_space_fix(remove_punc(lower(s)))
def rouge_score(prediction, ground_truth, **kwargs):
rouge = Rouge()
try:
scores = rouge.get_scores([prediction], [ground_truth], avg=True)
except:
return 0.0
return scores["rouge-l"]["f"]
def rouge_ja_score(prediction, ground_truth, tokenizer, **kwargs):
prediction_tokens = [word.surface for word in tokenizer(prediction)]
ground_truth_tokens = [word.surface for word in tokenizer(ground_truth)]
prediction = " ".join(prediction_tokens)
ground_truth = " ".join(ground_truth_tokens)
score = rouge_score(prediction, ground_truth)
return score
def f1_score(prediction, ground_truth, **kwargs):
common = Counter(prediction) & Counter(ground_truth)
num_same = sum(common.values())
if num_same == 0:
return 0
precision = 1.0 * num_same / len(prediction)
recall = 1.0 * num_same / len(ground_truth)
f1 = (2 * precision * recall) / (precision + recall)
return f1
def qa_f1_ja_score(prediction, ground_truth, tokenizer, **kwargs):
prediction_tokens = [word.surface for word in tokenizer(prediction)]
ground_truth_tokens = [word.surface for word in tokenizer(ground_truth)]
prediction_tokens = [normalize_ja_answer(token) for token in prediction_tokens]
ground_truth_tokens = [normalize_ja_answer(token) for token in ground_truth_tokens]
prediction_tokens = [token for token in prediction_tokens if len(token) > 0]
ground_truth_tokens = [token for token in ground_truth_tokens if len(token) > 0]
return f1_score(prediction_tokens, ground_truth_tokens)