Skip to content

Commit 18e9dde

Browse files
fix bugs and make it work :) fat model, but its avg. okay
1 parent 3a2d4c4 commit 18e9dde

File tree

7 files changed

+87
-77
lines changed

7 files changed

+87
-77
lines changed

CMazeExperience.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,6 @@ def __init__(self, maxSize):
1313
def addEpisode(self, replay):
1414
score = sum(x[2] for x in replay)
1515
if score < self.minScore: return
16-
17-
# for i in range(len(replay)):
18-
# state, act, score, nextState = replay[i]
19-
# gamma = self.gamma
20-
# for j in range(i + 1, len(replay)):
21-
# score += gamma * replay[j][2]
22-
# gamma *= self.gamma
2316
self.episodes.append((replay, score))
2417

2518
if self.sizeLimit < len(self.episodes):
@@ -57,7 +50,6 @@ def take_batch(self, batch_size):
5750
nextStateWeight = 1 if ind < len(episode) - 1 else 0
5851
batch.append((state, act, score, nextState, nextStateWeight))
5952

60-
6153
return (
6254
np.array([x[0] for x in batch]),
6355
np.array([x[1] for x in batch]),

Core/CMazeEnviroment.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,8 @@ def _takeShot(self):
7777
return (data[x - d:x + d + 1, y - d:y + d + 1] for data in (maze, fog, moves))
7878

7979
def minimap(self):
80-
#maze, fog, moves = self._takeShot()
81-
maze, fog, moves = self.maze, self.fog, self.moves
82-
return (maze * fog, moves)
80+
maze, fog, moves = self._takeShot()
81+
return ((maze * fog) - (1 - fog), moves)
8382

8483
@property
8584
def state(self):
@@ -120,7 +119,7 @@ def invalidActions(self):
120119

121120
def state2input(self):
122121
maze, moves = self.minimap()
123-
state = np.dstack((maze, ))
122+
state = np.dstack((maze, moves))
124123
return state
125124

126125
@property

model.py

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import tensorflow.keras as keras
22
import tensorflow.keras.layers as layers
3+
import tensorflow as tf
34

45
def convBlock(prev, sz, filters):
56
conv_1 = layers.Convolution2D(filters, (sz, sz), padding="same", activation="relu")(prev)
@@ -14,28 +15,21 @@ def createModel(shape):
1415
res = convBlock(res, 3, filters=32)
1516

1617
res = layers.Flatten()(res)
17-
18-
res = layers.Dense(16 ** 2, activation='relu')(res)
19-
res = layers.Dropout(.2)(res)
20-
res = layers.Dense(16 ** 2, activation='relu')(res)
21-
res = layers.Dropout(.2)(res)
22-
res = layers.Dense(16 ** 2, activation='relu')(res)
23-
res = layers.Dropout(.2)(res)
24-
res = layers.Dense(8 ** 2, activation='relu')(res)
25-
res = layers.Dropout(.2)(res)
26-
res = layers.Dense(8 ** 2, activation='relu')(res)
27-
res = layers.Dropout(.2)(res)
28-
res = layers.Dense(8 ** 2, activation='relu')(res)
29-
res = layers.Dropout(.2)(res)
30-
res = layers.Dense(4 ** 2, activation='relu')(res)
31-
res = layers.Dropout(.2)(res)
32-
res = layers.Dense(4 ** 2, activation='relu')(res)
33-
res = layers.Dropout(.2)(res)
34-
res = layers.Dense(4 ** 2, activation='relu')(res)
35-
res = layers.Dropout(.2)(res)
3618

37-
res = layers.Dense(4, activation='linear')(res)
38-
return keras.Model(
39-
inputs=inputs,
40-
outputs=res
41-
)
19+
# dueling dqn
20+
valueBranch = layers.Dense(32, activation='relu')(res)
21+
valueBranch = layers.Dense(32, activation='relu')(valueBranch)
22+
valueBranch = layers.Dense(32, activation='relu')(valueBranch)
23+
valueBranch = layers.Dense(1, activation='linear')(valueBranch)
24+
25+
actionsBranch = layers.Dense(128, activation='relu')(res)
26+
actionsBranch = layers.Dense(64, activation='relu')(actionsBranch)
27+
actionsBranch = layers.Dense(64, activation='relu')(actionsBranch)
28+
actionsBranch = layers.Dense(64, activation='relu')(actionsBranch)
29+
actionsBranch = layers.Dense(4, activation='linear')(actionsBranch)
30+
31+
res = layers.Lambda(
32+
lambda x: x[1] + (x[0] - tf.reduce_mean(x[0], axis=-1, keepdims=True))
33+
)([actionsBranch, valueBranch])
34+
35+
return keras.Model(inputs=inputs, outputs=res)

train.py

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -39,83 +39,82 @@ def emulate(env, model, exploreRate, exploreDecay, steps, stopOnInvalid=False):
3939
probe = model.predict(np.array([state]))[0]
4040
if not stopOnInvalid:
4141
for i in env.invalidActions():
42-
probe[i] = -1
42+
probe[i] = -float('inf')
4343
act = np.argmax(probe)
4444

4545
if stopOnInvalid and not (act in valid):
46-
episodeReplay.append([state, act, -1, env.state2input()])
46+
episodeReplay.append([state, act, -10, env.state2input()])
4747
break
4848

4949
prevScore = env.score
5050
env.apply(MAZE_ACTIONS[act])
51-
normedScore = 1 if 0 < (env.score - prevScore) else -.1
51+
normedScore = 1 if 0 < (env.score - prevScore) else -0.1
5252
episodeReplay.append([state, act, normedScore, env.state2input()])
5353

5454
done = env.done
55-
exploreRate = max((.01, exploreRate * exploreDecay))
55+
exploreRate = max((.001, exploreRate * exploreDecay))
5656
return episodeReplay
5757

5858
if __name__ == "__main__":
59-
sz = 32
59+
sz = 64
6060
env = CMazeEnviroment(
6161
maze=(0.8 < np.random.rand(sz, sz)).astype(np.float32),
6262
pos=(0, 0),
6363
FOV=3,
6464
minimapSize=8
6565
)
66-
memory = CMazeExperience(maxSize=100)
66+
memory = CMazeExperience(maxSize=1000)
6767
done = False
68-
batch_size = 64
69-
playSteps = 64
68+
batch_size = 256
69+
playSteps = 96
7070

71-
bestModelScore = 0
71+
bestModelScore = -float('inf')
7272
model = createModel(shape=env.input_size)
7373
model.compile(
7474
optimizer=Adam(lr=1e-3),
7575
loss='mean_squared_error'
7676
)
77-
# model.load_weights('model.h5')
77+
#model.load_weights('weights/best.h5')
7878

7979
targetModel = createModel(shape=env.input_size)
80-
np.set_printoptions(precision=3)
8180
# collect data
82-
while len(memory) < 50:
81+
while len(memory) < 100:
8382
env.respawn()
8483
episodeReplay = emulate(
8584
env, model,
86-
exploreRate=0.9,
87-
exploreDecay=0.9,
85+
exploreRate=1,
86+
exploreDecay=1,
8887
steps=playSteps,
8988
stopOnInvalid=False
9089
)
9190
#################
9291
if 1 < len(episodeReplay):
9392
memory.addEpisode(episodeReplay)
9493
print(len(memory), env.score)
95-
memory.update()
9694

97-
train_episodes = 500
98-
test_episodes = 10
99-
exploreRate = 1
100-
exploreDecayPerEpoch = .9
101-
exploreDecay = .9
95+
train_episodes = 100
96+
test_episodes = 20
97+
exploreRate = .5
98+
exploreDecayPerEpoch = .95
99+
exploreDecay = .95
102100
for epoch in range(5000):
103101
print('Epoch %d' % epoch)
104102
# train
105103
targetModel.set_weights(model.get_weights())
106104
lossSum = 0
107105
for n in range(train_episodes):
108106
states, actions, rewards, nextStates, nextReward = memory.take_batch(batch_size)
109-
targets = targetModel.predict(nextStates)
110-
targets[np.arange(len(targets)), actions] = rewards + np.max(targets, axis=1) * .9 * nextReward
107+
nextScores = targetModel.predict(nextStates)
108+
targets = targetModel.predict(states)
109+
targets[np.arange(len(targets)), actions] = rewards + np.max(nextScores, axis=1) * .95 * nextReward
111110

112111
lossSum += model.fit(
113112
states, targets,
114113
epochs=1,
115114
verbose=0
116115
).history['loss'][0]
116+
117117
print('Avg. train loss: %.4f' % (lossSum / train_episodes))
118-
print(targets[0])
119118

120119
# test
121120
print('Epoch %d testing' % epoch)
@@ -141,6 +140,6 @@ def emulate(env, model, exploreRate, exploreDecay, steps, stopOnInvalid=False):
141140
if bestModelScore < scoreSum:
142141
bestModelScore = scoreSum
143142
print('save best model')
144-
model.save_weights('model.h5')
145-
model.save_weights('latest.h5')
143+
model.save_weights('weights/best.h5')
144+
model.save_weights('weights/latest.h5')
146145
exploreRate *= exploreDecayPerEpoch

view_maze.py

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,13 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
3-
import sys
4-
import os
53
import tensorflow as tf
4+
import os
65

7-
if 'COLAB_GPU' in os.environ:
8-
# fix resolve modules
9-
from os.path import dirname
10-
sys.path.append(dirname(dirname(dirname(__file__))))
11-
else: # local GPU
12-
gpus = tf.config.experimental.list_physical_devices('GPU')
13-
tf.config.experimental.set_virtual_device_configuration(
14-
gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1 * 1024)]
15-
)
6+
# limit GPU usage
7+
gpus = tf.config.experimental.list_physical_devices('GPU')
8+
tf.config.experimental.set_virtual_device_configuration(
9+
gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1 * 1024)]
10+
)
1611

1712
from Core.CMazeEnviroment import CMazeEnviroment, MazeActions
1813
import numpy as np
@@ -44,6 +39,7 @@ class Colors:
4439

4540
class App:
4641
MODES = ['manual', 'random', 'agent']
42+
NETWORKS = ['best', 'latest']
4743

4844
def __init__(self):
4945
self._running = True
@@ -52,6 +48,8 @@ def __init__(self):
5248
self._mode = 'manual'
5349
self._paused = True
5450
self._speed = 20
51+
self._usedNetwork = self.NETWORKS[0]
52+
return
5553

5654
def _createMaze(self):
5755
self._maze = createMaze()
@@ -65,7 +63,18 @@ def on_init(self):
6563
pygame.display.set_caption('Deep maze')
6664
self._font = pygame.font.Font(pygame.font.get_default_font(), 16)
6765
self._running = True
68-
66+
67+
def _createNewAgent(self):
68+
filename = 'weights/%s.h5' % self._usedNetwork
69+
if not os.path.exists(filename):
70+
self._usedNetwork = self.NETWORKS[0]
71+
filename = 'weights/%s.h5' % self._usedNetwork
72+
73+
self._agent = createModel(shape=self._maze.input_size)
74+
self._agent.load_weights(filename)
75+
self._paused = True
76+
return
77+
6978
def on_event(self, event):
7079
if event.type == G.QUIT:
7180
self._running = False
@@ -77,12 +86,21 @@ def on_event(self, event):
7786
self._paused = True
7887

7988
if 'agent' == self._mode:
80-
self._agent = createModel(shape=self._maze.input_size)
81-
self._agent.load_weights('model.h5')
89+
self._createNewAgent()
8290

8391
if G.K_SPACE == event.key:
8492
self._paused = not self._paused
8593

94+
if 'agent' == self._mode:
95+
if G.K_r == event.key:
96+
self._createMaze()
97+
if G.K_n == event.key:
98+
self._createNewAgent()
99+
if G.K_t == event.key:
100+
network = next((i for i, x in enumerate(self.NETWORKS) if x == self._usedNetwork))
101+
self._usedNetwork = self.NETWORKS[(network + 1) % len(self.NETWORKS)]
102+
self._createNewAgent()
103+
86104
if G.K_ESCAPE == event.key:
87105
self._running = False
88106

@@ -121,7 +139,7 @@ def on_loop(self):
121139
if 'agent' == self._mode:
122140
probe = self._agent.predict(np.array([self._maze.state2input()]))[0]
123141
for i in self._maze.invalidActions():
124-
probe[i] = -1
142+
probe[i] = -float('inf')
125143
pred = np.argmax(probe)
126144

127145
act = list(MazeActions)[pred]
@@ -196,12 +214,20 @@ def _renderInfo(self):
196214
False, Colors.BLUE
197215
), (655, 35)
198216
)
217+
218+
if 'agent' == self._mode:
219+
self._display_surf.blit(
220+
self._font.render(
221+
'Network: %s' % (self._usedNetwork),
222+
False, Colors.BLUE
223+
), (655, 55)
224+
)
199225
return
200226

201227
def on_render(self):
202228
self._display_surf.fill(Colors.SILVER)
203229
self._renderMaze()
204-
# self._renderMazeMinimap()
230+
self._renderMazeMinimap()
205231
self._renderInfo()
206232
pygame.display.flip()
207233

weights/best.h5

5.85 MB
Binary file not shown.

weights/latest.h5

5.85 MB
Binary file not shown.

0 commit comments

Comments
 (0)