Skip to content

Commit 3a2d4c4

Browse files
tried to use RL (failed)
1 parent 4389744 commit 3a2d4c4

File tree

5 files changed

+384
-19
lines changed

5 files changed

+384
-19
lines changed

CMazeExperience.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import random
2+
import numpy as np
3+
import math
4+
5+
class CMazeExperience:
6+
def __init__(self, maxSize):
7+
self.maxSize = maxSize
8+
self.sizeLimit = (maxSize * 1.1)
9+
self.episodes = []
10+
self.gamma = 0.5
11+
self.minScore = -math.inf
12+
13+
def addEpisode(self, replay):
14+
score = sum(x[2] for x in replay)
15+
if score < self.minScore: return
16+
17+
# for i in range(len(replay)):
18+
# state, act, score, nextState = replay[i]
19+
# gamma = self.gamma
20+
# for j in range(i + 1, len(replay)):
21+
# score += gamma * replay[j][2]
22+
# gamma *= self.gamma
23+
self.episodes.append((replay, score))
24+
25+
if self.sizeLimit < len(self.episodes):
26+
self.update()
27+
return
28+
29+
def update(self):
30+
self.episodes = list(
31+
sorted(self.episodes, key=lambda x: x[1], reverse=True)
32+
)[:self.maxSize]
33+
self.minScore = self.episodes[-1][1]
34+
print('Min score: %.6f' % self.minScore)
35+
36+
def __len__(self):
37+
return len(self.episodes)
38+
39+
def take_batch(self, batch_size):
40+
batch = []
41+
weights = [x[1] for x in self.episodes]
42+
while len(batch) < batch_size:
43+
episode, _ = random.choices(
44+
self.episodes,
45+
weights=weights,
46+
k=1
47+
)[0]
48+
49+
minibatchIndexes = set(random.choices(
50+
np.arange(len(episode)),
51+
weights=[abs(x[2]) for x in episode],
52+
k=min((5, batch_size - len(batch), len(episode)))
53+
))
54+
55+
for ind in minibatchIndexes:
56+
state, act, score, nextState = episode[ind]
57+
nextStateWeight = 1 if ind < len(episode) - 1 else 0
58+
batch.append((state, act, score, nextState, nextStateWeight))
59+
60+
61+
return (
62+
np.array([x[0] for x in batch]),
63+
np.array([x[1] for x in batch]),
64+
np.array([x[2] for x in batch]),
65+
np.array([x[3] for x in batch]),
66+
np.array([x[4] for x in batch]),
67+
)

Core/CMazeEnviroment.py

Lines changed: 57 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,14 @@ class MazeActions(Enum):
77
RIGHT = (1, 0)
88
UP = (0, -1)
99
DOWN = (0, 1)
10+
11+
MAZE_ACTIONS_AS_INT = { x: i for i, x in enumerate(MazeActions) }
12+
MAZE_ACTIONS = [x for x in MazeActions]
1013

1114
class CMazeEnviroment:
12-
def __init__(self, maze, pos, FOV):
15+
def __init__(self, maze, pos, FOV, minimapSize):
1316
self.maze = np.pad(np.array(maze), FOV, constant_values=(1,))
17+
self.minimapSize = minimapSize
1418
self._fov = FOV
1519

1620
x, y = np.array(pos) + FOV
@@ -20,7 +24,8 @@ def __init__(self, maze, pos, FOV):
2024
def spawnAt(self, x, y):
2125
self.pos = np.array([y, x])
2226
self.fog = np.zeros_like(self.maze)
23-
self._updateFog()
27+
self.moves = np.zeros_like(self.maze)
28+
self._update()
2429
return
2530

2631
def respawn(self):
@@ -33,17 +38,17 @@ def respawn(self):
3338
break
3439
return
3540

36-
def _updateFog(self):
41+
def _update(self):
3742
y, x = self.pos
38-
self.fog[
39-
x - self._fov:x + self._fov + 1,
40-
y - self._fov:y + self._fov + 1
41-
] = 1
43+
d = self._fov
44+
self.fog[x - d:x + d + 1, y - d:y + d + 1] = 1
45+
self.moves[x, y] = 1
4246
return
4347

4448
def apply(self, action):
4549
self.pos += action.value
46-
self._updateFog()
50+
self.lastAction = MAZE_ACTIONS_AS_INT[action]
51+
self._update()
4752
return
4853

4954
def vision(self):
@@ -52,15 +57,38 @@ def vision(self):
5257
x - self._fov:x + self._fov + 1,
5358
y - self._fov:y + self._fov + 1
5459
]
60+
61+
def _takeShot(self):
62+
maze, fog, moves = self.maze, self.fog, self.moves
63+
y, x = self.pos
64+
h, w = self.maze.shape
65+
66+
isXAxisOk = (self.minimapSize < x) and (x < (w - self.minimapSize))
67+
isYAxisOk = (self.minimapSize < y) and (y < (h - self.minimapSize))
68+
if not (isXAxisOk and isYAxisOk):
69+
x += self.minimapSize
70+
y += self.minimapSize
71+
maze = np.pad(maze, self.minimapSize, constant_values=(1,))
72+
fog, moves = (
73+
np.pad(data, self.minimapSize, constant_values=(0,)) for data in (fog, moves)
74+
)
75+
76+
d = self.minimapSize
77+
return (data[x - d:x + d + 1, y - d:y + d + 1] for data in (maze, fog, moves))
78+
79+
def minimap(self):
80+
#maze, fog, moves = self._takeShot()
81+
maze, fog, moves = self.maze, self.fog, self.moves
82+
return (maze * fog, moves)
5583

5684
@property
5785
def state(self):
58-
return ((self.vision(), self.fog, ), self.score, self.done)
86+
return ((self.minimap(), ), self.score, self.done)
5987

6088
@property
6189
def done(self):
62-
y, x = self._pos
63-
return 1 < self.maze[x, y]
90+
y, x = self.pos
91+
return 0 < self.maze[x, y]
6492

6593
@property
6694
def score(self):
@@ -70,15 +98,31 @@ def score(self):
7098

7199
def copy(self):
72100
# dirty copy
73-
res = CMazeEnviroment(self.maze, self.pos, self._fov)
101+
res = CMazeEnviroment(self.maze, self.pos, self._fov, self.minimapSize)
74102
res.maze = self.maze.copy()
75103
res.fog = self.fog.copy()
76104
res.pos = self.pos.copy()
105+
res.moves = self.moves.copy()
77106
return res
78107

79108
def isPossible(self, action):
80109
y, x = self.pos + action.value
81110
return self.maze[x, y] <= 0
82111

83112
def validActions(self):
84-
return [ act for act in MazeActions if self.isPossible(act) ]
113+
return [ act for act in MazeActions if self.isPossible(act) ]
114+
115+
def validActionsIndex(self):
116+
return [ i for i, act in enumerate(MazeActions) if self.isPossible(act) ]
117+
118+
def invalidActions(self):
119+
return [ i for i, act in enumerate(MazeActions) if not self.isPossible(act) ]
120+
121+
def state2input(self):
122+
maze, moves = self.minimap()
123+
state = np.dstack((maze, ))
124+
return state
125+
126+
@property
127+
def input_size(self):
128+
return self.state2input().shape

model.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import tensorflow.keras as keras
2+
import tensorflow.keras.layers as layers
3+
4+
def convBlock(prev, sz, filters):
5+
conv_1 = layers.Convolution2D(filters, (sz, sz), padding="same", activation="relu")(prev)
6+
conv_1 = layers.Dropout(0.1)(conv_1)
7+
conv_1 = layers.BatchNormalization()(conv_1)
8+
return conv_1
9+
10+
def createModel(shape):
11+
inputs = res = layers.Input(shape=shape)
12+
res = convBlock(res, 3, filters=32)
13+
res = convBlock(res, 3, filters=32)
14+
res = convBlock(res, 3, filters=32)
15+
16+
res = layers.Flatten()(res)
17+
18+
res = layers.Dense(16 ** 2, activation='relu')(res)
19+
res = layers.Dropout(.2)(res)
20+
res = layers.Dense(16 ** 2, activation='relu')(res)
21+
res = layers.Dropout(.2)(res)
22+
res = layers.Dense(16 ** 2, activation='relu')(res)
23+
res = layers.Dropout(.2)(res)
24+
res = layers.Dense(8 ** 2, activation='relu')(res)
25+
res = layers.Dropout(.2)(res)
26+
res = layers.Dense(8 ** 2, activation='relu')(res)
27+
res = layers.Dropout(.2)(res)
28+
res = layers.Dense(8 ** 2, activation='relu')(res)
29+
res = layers.Dropout(.2)(res)
30+
res = layers.Dense(4 ** 2, activation='relu')(res)
31+
res = layers.Dropout(.2)(res)
32+
res = layers.Dense(4 ** 2, activation='relu')(res)
33+
res = layers.Dropout(.2)(res)
34+
res = layers.Dense(4 ** 2, activation='relu')(res)
35+
res = layers.Dropout(.2)(res)
36+
37+
res = layers.Dense(4, activation='linear')(res)
38+
return keras.Model(
39+
inputs=inputs,
40+
outputs=res
41+
)

train.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# -*- coding: utf-8 -*-
2+
import sys
3+
import os
4+
import tensorflow as tf
5+
from CMazeExperience import CMazeExperience
6+
7+
if 'COLAB_GPU' in os.environ:
8+
# fix resolve modules
9+
from os.path import dirname
10+
sys.path.append(dirname(dirname(dirname(__file__))))
11+
else: # local GPU
12+
gpus = tf.config.experimental.list_physical_devices('GPU')
13+
tf.config.experimental.set_virtual_device_configuration(
14+
gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1 * 1024)]
15+
)
16+
17+
import random
18+
import numpy as np
19+
20+
from keras.optimizers import Adam
21+
22+
from Core.CMazeEnviroment import CMazeEnviroment, MAZE_ACTIONS
23+
from model import createModel
24+
25+
def emulate(env, model, exploreRate, exploreDecay, steps, stopOnInvalid=False):
26+
episodeReplay = []
27+
done = False
28+
N = 0
29+
while (N < steps) and not done:
30+
N += 1
31+
act = None
32+
valid = env.validActionsIndex()
33+
if not valid: break
34+
35+
state = env.state2input()
36+
if random.random() < exploreRate:
37+
act = random.choice(valid)
38+
else:
39+
probe = model.predict(np.array([state]))[0]
40+
if not stopOnInvalid:
41+
for i in env.invalidActions():
42+
probe[i] = -1
43+
act = np.argmax(probe)
44+
45+
if stopOnInvalid and not (act in valid):
46+
episodeReplay.append([state, act, -1, env.state2input()])
47+
break
48+
49+
prevScore = env.score
50+
env.apply(MAZE_ACTIONS[act])
51+
normedScore = 1 if 0 < (env.score - prevScore) else -.1
52+
episodeReplay.append([state, act, normedScore, env.state2input()])
53+
54+
done = env.done
55+
exploreRate = max((.01, exploreRate * exploreDecay))
56+
return episodeReplay
57+
58+
if __name__ == "__main__":
59+
sz = 32
60+
env = CMazeEnviroment(
61+
maze=(0.8 < np.random.rand(sz, sz)).astype(np.float32),
62+
pos=(0, 0),
63+
FOV=3,
64+
minimapSize=8
65+
)
66+
memory = CMazeExperience(maxSize=100)
67+
done = False
68+
batch_size = 64
69+
playSteps = 64
70+
71+
bestModelScore = 0
72+
model = createModel(shape=env.input_size)
73+
model.compile(
74+
optimizer=Adam(lr=1e-3),
75+
loss='mean_squared_error'
76+
)
77+
# model.load_weights('model.h5')
78+
79+
targetModel = createModel(shape=env.input_size)
80+
np.set_printoptions(precision=3)
81+
# collect data
82+
while len(memory) < 50:
83+
env.respawn()
84+
episodeReplay = emulate(
85+
env, model,
86+
exploreRate=0.9,
87+
exploreDecay=0.9,
88+
steps=playSteps,
89+
stopOnInvalid=False
90+
)
91+
#################
92+
if 1 < len(episodeReplay):
93+
memory.addEpisode(episodeReplay)
94+
print(len(memory), env.score)
95+
memory.update()
96+
97+
train_episodes = 500
98+
test_episodes = 10
99+
exploreRate = 1
100+
exploreDecayPerEpoch = .9
101+
exploreDecay = .9
102+
for epoch in range(5000):
103+
print('Epoch %d' % epoch)
104+
# train
105+
targetModel.set_weights(model.get_weights())
106+
lossSum = 0
107+
for n in range(train_episodes):
108+
states, actions, rewards, nextStates, nextReward = memory.take_batch(batch_size)
109+
targets = targetModel.predict(nextStates)
110+
targets[np.arange(len(targets)), actions] = rewards + np.max(targets, axis=1) * .9 * nextReward
111+
112+
lossSum += model.fit(
113+
states, targets,
114+
epochs=1,
115+
verbose=0
116+
).history['loss'][0]
117+
print('Avg. train loss: %.4f' % (lossSum / train_episodes))
118+
print(targets[0])
119+
120+
# test
121+
print('Epoch %d testing' % epoch)
122+
bestScore = scoreSum = movesSum = 0
123+
n = 0
124+
while n < test_episodes:
125+
env.respawn()
126+
episodeReplay = emulate(
127+
env, model,
128+
exploreRate=exploreRate,
129+
exploreDecay=exploreDecay,
130+
steps=playSteps*2,
131+
stopOnInvalid=True
132+
)
133+
if 1 < len(episodeReplay):
134+
memory.addEpisode(episodeReplay)
135+
n += 1
136+
bestScore = max((bestScore, env.score))
137+
scoreSum += env.score
138+
movesSum += len(episodeReplay)
139+
#################
140+
print('Best score: %.3f, avg. score: %.3f, avg. moves: %.1f' % (bestScore, scoreSum / n, movesSum / n))
141+
if bestModelScore < scoreSum:
142+
bestModelScore = scoreSum
143+
print('save best model')
144+
model.save_weights('model.h5')
145+
model.save_weights('latest.h5')
146+
exploreRate *= exploreDecayPerEpoch

0 commit comments

Comments
 (0)