-
Notifications
You must be signed in to change notification settings - Fork 1
/
strip_weights.py
99 lines (87 loc) · 2.24 KB
/
strip_weights.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/python3
import regex
rule_regexp = regex.compile("""
(?P<lhs>\w+) \s* # lhs
# (?:
# ->
# (?<type>.*)
# )?
::= \s*
(?P<rhs> # rhs
.+
(\n\s*\|.+)*
)
\n
""", regex.VERBOSE)
# replace with ' ~ '
concat_regexp = regex.compile("""
(?<=
[\w\)'"\+\*\?]
)
[ ] # space
(?=
[\w\('"&!]
)
""", regex.VERBOSE)
separator_regexp = regex.compile("""
(?<string>
[a-zA-Z_][a-zA-Z0-9_]* |
(?<rec> #capturing group rec
\\( #open parenthesis
(?: #non-capturing group
[^()]++ #anyting but parenthesis one or more times without backtracking
| #or
(?&rec) #recursive substitute of group rec
)*
\\) #close parenthesis
)
)
% (?<separator>
"[^"]+"
)
(?<operator>\+|\*)
""", regex.VERBOSE)
token_regexp = regex.compile("""
'(.+)' \s*
-> \s*
(\w+) \s*
(?:
: \s*
(\w+) \s*
{(.*)}
)?
""", regex.VERBOSE)
token_regexp_regexp = regex.compile("""
\/(.+)\/ \s*
-> \s*
(\w+) \s*
(?:
: \s*
(\w+) \s*
{(.*)}
)?
""", regex.VERBOSE)
start_decl_regexp = regex.compile("""
^ \s* [#] start \s+ ([a-zA-Z_][a-zA-Z0-9_]*)
""", regex.VERBOSE | regex.MULTILINE)
def modify_seq(match):
string = match.group('string')
separator = match.group('separator')
operator = match.group('operator')
if operator == '*':
return f'(({string} ~ {separator})* ~ {string})?'
elif operator == '+':
return f'({string} ~ {separator})* ~ {string}'
else:
raise RuntimeError(f'Invalid seq operator: {operator}, expected: + or *')
def modify_rule(match):
lhs = match.group('lhs')
rhs = match.group('rhs')
rhs = regex.sub(concat_regexp, ' ~ ', rhs)
rhs = regex.sub(separator_regexp, modify_seq, rhs)
return f"{lhs} ::= {rhs}"
with open('grammar.pest', 'r') as grammar_file:
grammar_str = grammar_file.read()
grammar_str = regex.sub(rule_regexp, modify_rule, grammar_str)
with open('unweight_grammar.pest', 'w') as pest_file:
pest_file.write(grammar_str)