-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathguessbygenerating.py
71 lines (61 loc) · 2.33 KB
/
guessbygenerating.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# guessbygenerating.py
copyright = """Copyright © 2017, Kimmo Koskenniemi
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import hfst, sys, argparse
argparser = argparse.ArgumentParser(
"python3 gyessbygenerating.py",
description="Guess lexicon entries from generated forms of them")
argparser.add_argument(
"guesser", help="Guesser file FST", default="ofi-guess-n.fst")
argparser.add_argument(
"rules",
help="name of the two-level rule file")
argparser.add_argument(
"-v", "--verbosity", default=0, type=int,
help="level of diagnostic output")
args = argparser.parse_args()
guesser_fil = hfst.HfstInputStream(args.guesser)
guesser_fst = guesser_fil.read()
guesser_fil.close()
#guesser_fst.invert()
guesser_fst.minimize()
guesser_fst.lookup_optimize()
import sys, re
import generate
suf = {"/s": ["", "n", "{nrs}{aä}", "{ij}{Øt}{aä}"]}
print()
for line_nl in sys.stdin:
line = line_nl.strip()
res = guesser_fst.lookup(line, output="tuple")
if args.verbosity >= 10:
print("lookup result =", res)
best_w = min([w for e,w in res])
entry_weight_lst = [(e, w) for e, w in res if w < best_w + 10]
stem_next_weight_lst = []
for e, w in entry_weight_lst:
[stem, next] = e.split(" ")
stem_next_weight_lst.append((stem, next, w))
i = 0
for [stem, next, weight] in stem_next_weight_lst:
i += 1
print("({}) {} {} ; {:.2}".format(i, stem, next, weight))
suffix_lst = suf.get(next, "")
word_lst = []
for suffix in suffix_lst:
results = generate.generate(stem+suffix)
for r in results:
#print("r =", r)###
word = "".join(r).replace("Ø", "")
word_lst.append(word)
print(" ", " ".join(word_lst))
print()