-
Notifications
You must be signed in to change notification settings - Fork 23
/
balancebot_env.py
105 lines (82 loc) · 3.46 KB
/
balancebot_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os
import math
import numpy as np
import gym
from gym import spaces
from gym.utils import seeding
import pybullet as p
import pybullet_data
class BalancebotEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 50
}
def __init__(self, render=False):
self._observation = []
self.action_space = spaces.Discrete(9)
self.observation_space = spaces.Box(np.array([-math.pi, -math.pi, -5]),
np.array([math.pi, math.pi, 5])) # pitch, gyro, com.sp.
if (render):
self.physicsClient = p.connect(p.GUI)
else:
self.physicsClient = p.connect(p.DIRECT) # non-graphical version
p.setAdditionalSearchPath(pybullet_data.getDataPath()) # used by loadURDF
self._seed()
# paramId = p.addUserDebugParameter("My Param", 0, 100, 50)
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def _step(self, action):
self._assign_throttle(action)
p.stepSimulation()
self._observation = self._compute_observation()
reward = self._compute_reward()
done = self._compute_done()
self._envStepCounter += 1
return np.array(self._observation), reward, done, {}
def _reset(self):
# reset is called once at initialization of simulation
self.vt = 0
self.vd = 0
self.maxV = 24.6 # 235RPM = 24,609142453 rad/sec
self._envStepCounter = 0
p.resetSimulation()
p.setGravity(0,0,-10) # m/s^2
p.setTimeStep(0.01) # sec
planeId = p.loadURDF("plane.urdf")
cubeStartPos = [0,0,0.001]
cubeStartOrientation = p.getQuaternionFromEuler([0,0,0])
path = os.path.abspath(os.path.dirname(__file__))
self.botId = p.loadURDF(os.path.join(path, "balancebot_simple.xml"),
cubeStartPos,
cubeStartOrientation)
# you *have* to compute and return the observation from reset()
self._observation = self._compute_observation()
return np.array(self._observation)
def _assign_throttle(self, action):
dv = 0.1
deltav = [-10.*dv,-5.*dv, -2.*dv, -0.1*dv, 0, 0.1*dv, 2.*dv,5.*dv, 10.*dv][action]
vt = clamp(self.vt + deltav, -self.maxV, self.maxV)
self.vt = vt
p.setJointMotorControl2(bodyUniqueId=self.botId,
jointIndex=0,
controlMode=p.VELOCITY_CONTROL,
targetVelocity=vt)
p.setJointMotorControl2(bodyUniqueId=self.botId,
jointIndex=1,
controlMode=p.VELOCITY_CONTROL,
targetVelocity=-vt)
def _compute_observation(self):
cubePos, cubeOrn = p.getBasePositionAndOrientation(self.botId)
cubeEuler = p.getEulerFromQuaternion(cubeOrn)
linear, angular = p.getBaseVelocity(self.botId)
return [cubeEuler[0],angular[0],self.vt]
def _compute_reward(self):
return 0.1 - abs(self.vt - self.vd) * 0.005
def _compute_done(self):
cubePos, _ = p.getBasePositionAndOrientation(self.botId)
return cubePos[2] < 0.15 or self._envStepCounter >= 1500
def _render(self, mode='human', close=False):
pass
def clamp(n, minn, maxn):
return max(min(maxn, n), minn)