-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathSuperMarioWorldAI-NEAT.py
175 lines (140 loc) · 5.74 KB
/
SuperMarioWorldAI-NEAT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import retro
import numpy as np
##pip install opencv-python to install cv2
import cv2
import neat
import pickle
# Play this retro game at this level.
env = retro.make('SuperMarioWorld-Snes', 'DonutPlains1.state')
imgarray = []
def eval_genomes(genomes, config):
for genome_id, genome in genomes:
ob = env.reset()
ac = env.action_space.sample() # action with generic sample
iny, inx, inc = env.observation_space.shape
inx = int(inx/8)
iny = int(iny/8)
# Create a Recurrent Neural Network.
net = neat.nn.recurrent.RecurrentNetwork.create(genome, config)
current_max_fitness = 0
fitness_current = 0
frame = 0
counter = 0
score = 0
scoreTracker = 0
coins = 0
coinsTracker = 0
yoshiCoins = 0
yoshiCoinsTracker = 0
xPosPrevious = 0
yPosPrevious = 0
checkpoint = False
checkpointValue = 0
endOfLevel = 0
powerUps = 0
powerUpsLast = 0
jump = 0
done = False
while not done:
env.render()
frame += 1
ob = cv2.resize(ob, (inx, iny))
ob = cv2.cvtColor(ob, cv2.COLOR_BGR2GRAY)
ob = np.reshape(ob, (inx, iny))
imgarray = ob.flatten()
nnOutput = net.activate(imgarray)
ob, rew, done, info = env.step(nnOutput)
score = info['score']
coins = info['coins']
yoshiCoins = info['yoshiCoins']
dead = info['dead']
xPos = info['x']
yPos = info['y']
jump = info['jump']
checkpointValue = info['checkpoint']
endOfLevel = info['endOfLevel']
powerUps = info['powerups']
# Add to fitness score if mario gains points on his score.
if score > 0:
if score > scoreTracker:
fitness_current = (score * 10)
scoreTracker = score
# Add to fitness score if mario gets more coins.
if coins > 0:
if coins > coinsTracker:
fitness_current += (coins - coinsTracker)
coinsTracker = coins
# Add to fitness score if marioe gets more yoshi coins.
if yoshiCoins > 0:
if yoshiCoins > yoshiCoinsTracker:
fitness_current += (yoshiCoins - yoshiCoinsTracker) * 10
yoshiCoinsTracker = yoshiCoins
# As mario moves right, reward him slightly.
if xPos > xPosPrevious:
if jump > 0:
fitness_current += 10
fitness_current += (xPos / 100)
xPosPrevious = xPos
counter = 0
# If mario is standing still or going backwards, penalize him slightly.
else:
counter += 1
fitness_current -= 0.1
# Award mario slightly for going up higher in the y position (y pos is inverted).
if yPos < yPosPrevious:
fitness_current += 10
yPosPrevious = yPos
elif yPos < yPosPrevious:
yPosPrevious = yPos
# If mario loses a powerup, punish him 1000 points.
if powerUps == 0:
if powerUpsLast == 1:
fitness_current -= 500
print("Lost Upgrade")
# If powerups is 1, mario got a mushroom...reward him for keeping it.
elif powerUps == 1:
if powerUpsLast == 1 or powerUpsLast == 0:
fitness_current += 0.025
elif powerUpsLast == 2:
fitness_current -= 500
print("Lost Upgrade")
# If powerups is 2, mario got a cape feather...reward him for keeping it.
elif powerUps == 2:
fitness_current += 0.05
powerUpsLast = powerUps
# If mario doesn't get any rewards for 1000 frames or move forward, then he finishes.
#if fitness_current > current_max_fitness:
#current_max_fitness = fitness_current
#counter = 0
#else:
#counter += 1
# If mario reaches the checkpoint (located at around xpos == 2425) then give him a huge bonus.
if checkpointValue == 1 and checkpoint == False:
fitness_current += 20000
checkpoint = True
# If mario reaches the end of the level, award him automatic winner.
if endOfLevel == 1:
fitness_current += 1000000
done = True
# If mario is standing still or going backwards for 1000 frames, end his try.
if counter == 1000:
fitness_current -= 125
done = True
# If mario dies, dead becomes 0, so when it is 0, penalize him and move on.
if dead == 0:
fitness_current -= 100
done = True
if done == True:
print(genome_id, fitness_current)
genome.fitness = fitness_current
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
neat.DefaultSpeciesSet, neat.DefaultStagnation,
'config-feedforward')
p = neat.Population(config)
p.add_reporter(neat.StdOutReporter(True))
stats = neat.StatisticsReporter()
p.add_reporter(stats)
p.add_reporter(neat.Checkpointer(10))
winner = p.run(eval_genomes)
with open('winner.pkl', 'wb') as output:
pickle.dump(winner, output, 1)