-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenvironment.py
120 lines (88 loc) · 4.41 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from state import State
from coqinterface import CoqInterface
from definitions import Poset, Lattice, LatticeLemmas
from actions import Undo
import re
class Environment:
def __init__(self, theorem):
self.coqinterface = None
self.theorem = theorem
self.reset()
def reset(self):
if self.coqinterface:
self.coqinterface.exit()
self.state = State()
self.coqinterface = CoqInterface(env=self)
self.load_definitions(Poset, Lattice)
self.start_section()
self.load_definitions(LatticeLemmas)
self.start_theorem()
self.num_subgoals = 1
def load_definitions(self, *defs):
# send definitions of posets and lattice to the environment
for definition in defs:
self.coqinterface.sendone(definition.to_coq())
def start_section(self):
self.coqinterface.sendone("Section Lattice. Context `{L : Lattice A}.")
def start_theorem(self):
response = self.coqinterface.sendone(self.theorem.to_coq())
self.update_subgoals(response)
def update_subgoals(self, response):
all_subgoals = []
if "============================" in response:
num_subgoals = int(response.split(" subgoal")[0].split("\r\n")[-1].strip())
self.state.current_subgoal = response.split("============================")[-1].split("\r\n")[1].strip()
all_subgoals.append(self.state.current_subgoal)
# get more than first
if num_subgoals > 1:
for i in range(2, num_subgoals+1):
subgoal = response.split("subgoal {} is:".format(i))[-1].split("\r\n")[1].strip()
all_subgoals.append(subgoal)
# then update the state with that new subgoal
self.state.all_subgoals = all_subgoals = tuple(sorted(all_subgoals))
self.state.num_subgoals = num_subgoals
def step(self, action):
reward = 0
to_undo = False
# dont let it undo if the list of last actions is empty.
if self.state.past_actions == [] and isinstance(action, Undo):
raise Exception("Tried to undo the first action...so this proof is be impossible to prove given the actions provided.")
# send to coq
response = self.coqinterface.sendone(action.to_coq())
# add to list of attempted actions (say that given the subgoal, we tried this action)
if not isinstance(action, Undo): self.state.add_attempted_action(action)
# update subgoal according to response
old_num_subgoals = self.state.num_subgoals
self.update_subgoals(response)
# if it was undo, we're done -- don't need to do any determining of rewards
if isinstance(action, Undo):
self.state.visited_subgoals.pop() # get rid of the last thing in chain, since we only want visited subgoals in the current path
return self.state, 0, self.state.done, to_undo
# if the action worked without error
if "Error" not in response:
reward = 10 # slight positive reward for not producing an error
# append to list of actions
self.state = self.state.get_copy()
self.state.append_action(action)
# try typing qed
self.coqinterface.sendone("Qed.")
# make reward even bigger if it was the last step to proving the theorem
if self.state.done:
reward += 100
else:
# make reward bigger if number of subgoals was reduced.
if self.state.num_subgoals < old_num_subgoals:
reward += 10
# make reward smaller if number of subgoals increases
# elif self.state.num_subgoals > old_num_subgoals:
# reward -= 0
# if it's an already visited state, you can just undo it
# make reward smaller for getting us into a loop
if self.state.all_subgoals in self.state.visited_subgoals:
reward = -20
to_undo = True
# add in this visited subgoal (so it will be popped in env.step)
self.state.visited_subgoals.append(self.state.all_subgoals )
else:
reward = -1000 # big negative reward for error
return self.state, reward, self.state.done, to_undo