-
Notifications
You must be signed in to change notification settings - Fork 2
/
chiasmus_example.py
47 lines (38 loc) · 1.28 KB
/
chiasmus_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from chiasmus import ChiasmusDetector
import sys
def main():
fn = sys.argv[1]
print('initialize detector')
chidect = ChiasmusDetector(
fasttext_model = './fasttext_models/wiki.de.bin',
feature_types = ['dubremetz', 'lexical', 'embedding'],
conjlist = ["und", "so", "weil", "weder", "noch", "aber", "für", "dennoch"],
neglist = ["nein", "nicht", "niemals", "nichts"],
pos_blacklist=["SPACE", "PUNCT", "PROPN", "DET"],
spacy_model = 'de_core_news_lg'
)
print('train with crossvalidation')
chidect.train_with_crossval(
training_file='data_example/data.json',
num_runs=5
)
chidect.print_summary()
print('train on whole dataset')
chidect.train(
training_file='data_example/data.json',
keep_model=True
)
print('find chaismi in new text')
chidect.run_pipeline_on_text(
filename=f'{fn}.txt',
text_folder="gerdracor",
processed_folder="processed",
candidates_folder="candidates",
id_start="test_"
)
chidect.get_top(
f'candidates/{fn}.txt.pkl',
'results.json',
100)
if __name__ == "__main__":
main()