Skip to content

Commit

Permalink
small board with timer
Browse files Browse the repository at this point in the history
  • Loading branch information
wenxinxu committed Mar 17, 2018
1 parent f11d618 commit 5bb80a3
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 5 deletions.
6 changes: 3 additions & 3 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class Config(object):
# maintains past 2.5 games worth of turns if num_episodes=500 and rings=19
# maintains past 1 game worth of turns if num_episodes=500 and rings=37
# buffer_size = 100000
buffer_size = 100000
buffer_size = 150000
epochs = 50
lr = 0.01

Expand All @@ -30,8 +30,8 @@ class Config(object):
# MCTS settings
# 100 sims and 1000 episodes ~ 37 seconds per episode / 13 days for 30 iters
# 25 sims and 500 episodes ~ 9 seconds per episode / 1.5 days for 30 iters
num_episodes = 500
num_sims = 25
num_episodes = 750
num_sims = 40
c_puct = 1
# Should be set based on game length to encourage exploration in early moves
temp_threshold = 6
Expand Down
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
rings = 19
marbles = {'w': 10, 'g': 10, 'b': 10}
win_con = [{'w': 2}, {'g': 2}, {'b': 2}, {'w': 1, 'g': 1, 'b': 1}]
t = 3
t = 5

# Setup
game = Game(rings, marbles, win_con, t)
Expand Down
9 changes: 8 additions & 1 deletion retrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from mcts import MCTS
from selfplay import SelfPlay, Arena
import time

class Coach(object):
def __init__(self, game, model, config):
Expand Down Expand Up @@ -82,8 +83,14 @@ def learn(self):
# Step 1. Generate training examples by self play with current model
self_play = SelfPlay(self.game, self.model)
new_examples = []
for _ in range(self.config.num_episodes):
for i in range(self.config.num_episodes):
start = time.time()
new_examples += self_play.generate_play_data()
now = time.time() - start

if i % 100 == 0:
print 'Time to generate an episode = %i s' %now

random.shuffle(new_examples)
self.example_buffer.extend(new_examples)
training_examples = self.examples_to_array(self.example_buffer)
Expand Down

0 comments on commit 5bb80a3

Please sign in to comment.