We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ff1c6a2 commit b57efb4Copy full SHA for b57efb4
ga3c/ProcessAgent.py
@@ -107,11 +107,8 @@ def run_episode(self):
107
terminal_reward = 0 if done else value
108
109
updated_exps = ProcessAgent._accumulate_rewards(experiences, self.discount_factor, terminal_reward)
110
- if len(updated_exps) == 0:
111
- yield None, None, None, total_reward
112
- else:
113
- x_, r_, a_ = self.convert_data(updated_exps)
114
- yield x_, r_, a_, reward_sum
+ x_, r_, a_ = self.convert_data(updated_exps)
+ yield x_, r_, a_, reward_sum
115
116
# reset the tmax count
117
time_count = 0
@@ -131,8 +128,6 @@ def run(self):
131
128
total_length = 0
132
129
for x_, r_, a_, reward_sum in self.run_episode():
133
130
total_reward += reward_sum
134
- if x_ is None:
135
- break
136
total_length += len(r_) + 1 # +1 for last frame that we drop
137
self.training_q.put((x_, r_, a_))
138
self.episode_log_q.put((datetime.now(), total_reward, total_length))
0 commit comments