-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathssyn2es.py
36 lines (31 loc) · 1.4 KB
/
ssyn2es.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# Copyright (c) 2017-2020 Works Applications Co., Ltd.
# Licensed under the Apache License Version 2.0, see
# https://github.com/WorksApplications/elasticsearch-sudachi/blob/828ee7cc991becf3818cbbf7d764612d495ec276/docs/ssyn2es.py
import argparse
import fileinput
def main():
parser = argparse.ArgumentParser(prog="ssyn2es.py", description="convert Sudachi synonyms to ES")
parser.add_argument('files', metavar='FILE', nargs='*', help='files to read, if empty, stdin is used')
parser.add_argument('-p', '--output-predicate', action='store_true', help='output predicates')
args = parser.parse_args()
synonyms = {}
with fileinput.input(files = args.files) as input:
for line in input:
line = line.strip()
if line == "":
continue
entry = line.split(",")[0:9]
if entry[2] == "2" or (not args.output_predicate and entry[1] == "2"):
continue
group = synonyms.setdefault(entry[0], [[], []])
group[1 if entry[2] == "1" else 0].append(entry[8])
for groupid in sorted(synonyms):
group = synonyms[groupid]
if not group[1]:
if len(group[0]) > 1:
print(",".join(group[0]))
else:
if len(group[0]) > 0 and len(group[1]) > 0:
print(",".join(group[0]) + "=>" + ",".join(group[0] + group[1]))
if __name__ == "__main__":
main()