/
env.py
109 lines (98 loc) · 3.1 KB
/
env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import random
import numpy as np
import copy
from config import NCOL,NROW,NFOOD,EPISODE_MAXLEN
from util import dirs,addModVec
class Reward:
IDLE = 0
COLLIDE = -1
FOOD = 1/4
def stateTransform(state,flipy,flipx,deltay,deltax):
def fy(y): return (NROW-1-(y+deltay+NROW)%NROW if flipy else y+NROW+deltay)%NROW
def fx(x): return (NCOL-1-(x+deltax+NCOL)%NCOL if flipx else x+NCOL+deltax)%NCOL
ret=copy.deepcopy(state)
ret['snake']=[(fy(y),fx(x)) for y,x in state['snake']]
ret['foods']=[(fy(y),fx(x)) for y,x in state['foods']]
return ret
def actTransform(act,flipy,flipx):
if act%2: return (act+2)%4 if flipx else act #1,3
else: return (act+2)%4 if flipy else act #0,2
class Env:
def __init__(self, flipTransform):
self.flipTransform=flipTransform
self.state=None
self.score=0
def useCell(self,cell):
self.empty_cells.remove(cell)
return cell
def releaseCell(self,cell):
self.empty_cells.append(cell)
return cell
def getRandomEmptyCell(self):
return random.sample(self.empty_cells,1)[0]
def reset(self, obs=None):
self.state=None
self.score=0
self.empty_cells = [(y,x) for y in range(NROW) for x in range(NCOL)]
if not obs:
obs={
'snake': [],
'foods': [],
'time': 0,
'done': False
}
self.snake = obs['snake']
self.foods = obs['foods']
self.time = obs['time']
self.done = obs['done']
for cell in self.snake:
self.useCell(cell)
while len(self.snake)<2:
self.snake.append(self.useCell(self.getRandomEmptyCell()))
for cell in self.foods:
self.useCell(cell)
while len(self.foods)<NFOOD:
self.foods.append(self.useCell(self.getRandomEmptyCell()))
return self.makeState()
def makeState(self):
s={'snake':self.snake,'foods':self.foods,'time':self.time,'done':self.done}
cy,cx=NROW//2,NCOL//2
hy,hx=self.snake[-1]
fy = np.random.randint(0,2) if self.flipTransform else 0
fx = np.random.randint(0,2) if self.flipTransform else 0
self.state=stateTransform(s,fy,fx,cy-hy,cx-hx)
return self.state
def getCellType(self,pos):
if pos in self.snake: return "SNAKE"
if pos in self.foods: return "FOOD"
if pos in self.empty_cells: return "EMPTY"
raise "Cell type couldn't be determined"
def step(self,action):
self.time+=1
if self.done:
raise "Ended game cannot step"
#remove tail
tail=self.snake[0]
self.snake.remove(self.releaseCell(tail))
npos = addModVec(self.snake[-1],dirs[action])
npos_type=self.getCellType(npos)
if npos_type=="FOOD":
self.foods.remove(self.releaseCell(npos))
self.snake.append(self.useCell(npos))
#restore tail
self.snake.insert(0,self.useCell(tail))
while len(self.empty_cells) and len(self.foods)<NFOOD:
self.foods.append(self.useCell(self.getRandomEmptyCell()))
self.score+=Reward.FOOD
return (self.makeState(),Reward.FOOD)
elif npos_type=="SNAKE":
self.done=True
self.score+=Reward.COLLIDE
return (self.makeState(),Reward.COLLIDE)
else: #EMPTY CELL
if self.time==EPISODE_MAXLEN:
self.done=True
return (self.makeState(),Reward.IDLE)
self.snake.append(self.useCell(npos))
self.score+=Reward.IDLE
return (self.makeState(),Reward.IDLE)