-
Notifications
You must be signed in to change notification settings - Fork 11
/
CliffWalking.kt
57 lines (53 loc) · 1.87 KB
/
CliffWalking.kt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
package lab.mars.rl.problem
import lab.mars.rl.model.impl.mdp.CNSetMDP
import lab.mars.rl.model.impl.mdp.IndexedMDP
import lab.mars.rl.model.impl.mdp.IndexedPossible
import lab.mars.rl.util.collection.cnsetOf
import lab.mars.rl.util.collection.emptyNSet
import lab.mars.rl.util.collection.fork
import lab.mars.rl.util.collection.map
import lab.mars.rl.util.dimension.x
object CliffWalking {
val world_height = 4
val world_width = 12
val move = arrayOf(
intArrayOf(0, 1), //up
intArrayOf(0, -1), //down
intArrayOf(-1, 0), //left
intArrayOf(1, 0)//right
)
val desc_move = arrayOf("↑", "↓", "←", "→")
fun make(): IndexedMDP {
val mdp = CNSetMDP(gamma = 1.0,
state_dim = world_width x world_height,
action_dim = 4)
return mdp.apply {
val goal = states[11, 0]
goal.actions = emptyNSet()
started = { states(0, 0).rand() }
val startedState = states[0, 0]
//cliff
for (x in 1 until world_width - 1)
states[x, 0].actions = emptyNSet()
for ((s, a) in states.fork { it.actions }) {
val m = move[a[0]]
val _x = (s[0] + m[0]).coerceIn(0, world_width - 1)
val _y = (s[1] + +m[1]).coerceIn(0, world_height - 1)
val next = states[_x, _y]
a.possibles = cnsetOf(IndexedPossible(next, if (next === goal) 0.0 else -1.0, 1.0))
}
startedState.actions[3].possibles = cnsetOf(IndexedPossible(startedState, -100.0, 1.0))
for ((s, a) in (1 until world_width - 1).map { states[it, 1] }.fork { it.actions }) {
val m = move[a[0]]
var _x = s[0] + m[0]
var _y = s[1] + +m[1]
if (_y == 0) {
_x = 0
_y = 0
}
val next = states[_x, _y]
a.possibles = cnsetOf(IndexedPossible(next, if (next === startedState) -100.0 else -1.0, 1.0))
}
}
}
}