forked from jsikyoon/a3c-distributed_tensorflow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
game_state.py
executable file
·88 lines (66 loc) · 2.33 KB
/
game_state.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-
import sys
import numpy as np
import cv2
from ale_python_interface import ALEInterface
from constants import ROM
from constants import ACTION_SIZE
class GameState(object):
def __init__(self, rand_seed, display=False, no_op_max=7):
self.ale = ALEInterface()
self.ale.setInt(b'random_seed', rand_seed)
self.ale.setFloat(b'repeat_action_probability', 0.0)
self.ale.setBool(b'color_averaging', True)
self.ale.setInt(b'frame_skip', 4)
self._no_op_max = no_op_max
if display:
self._setup_display()
self.ale.loadROM(ROM.encode('ascii'))
# collect minimal action set
self.real_actions = self.ale.getMinimalActionSet()
# height=210, width=160
self._screen = np.empty((210, 160, 1), dtype=np.uint8)
self.reset()
def _process_frame(self, action, reshape):
reward = self.ale.act(action)
terminal = self.ale.game_over()
# screen shape is (210, 160, 1)
self.ale.getScreenGrayscale(self._screen)
# reshape it into (210, 160)
reshaped_screen = np.reshape(self._screen, (210, 160))
# resize to height=110, width=84
resized_screen = cv2.resize(reshaped_screen, (84, 110))
x_t = resized_screen[18:102,:]
if reshape:
x_t = np.reshape(x_t, (84, 84, 1))
x_t = x_t.astype(np.float32)
x_t *= (1.0/255.0)
return reward, terminal, x_t
def _setup_display(self):
if sys.platform == 'darwin':
import pygame
pygame.init()
self.ale.setBool(b'sound', False)
elif sys.platform.startswith('linux'):
self.ale.setBool(b'sound', True)
self.ale.setBool(b'display_screen', True)
def reset(self):
self.ale.reset_game()
# randomize initial state
if self._no_op_max > 0:
no_op = np.random.randint(0, self._no_op_max + 1)
for _ in range(no_op):
self.ale.act(0)
_, _, x_t = self._process_frame(0, False)
self.reward = 0
self.terminal = False
self.s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)
def process(self, action):
# convert original 18 action index to minimal action set index
real_action = self.real_actions[action]
r, t, x_t1 = self._process_frame(real_action, True)
self.reward = r
self.terminal = t
self.s_t1 = np.append(self.s_t[:,:,1:], x_t1, axis = 2)
def update(self):
self.s_t = self.s_t1