In [1]:
import pybullet as p
import time
import numpy as np
import gym
from gym import spaces
import cv2

In [2]:
class Test_car(gym.Env):
    
    def __init__(self):
        print("init")
        super().__init__()
        self.episodes = 0
        self.max_steps = 30
        self.height = 64
        self.width = 64
        self.action_space = spaces.Discrete(4) #前後左右
        self.observation_space = spaces.Box(0, 255, [self.height, self.width, 3]) #Boxは連続値
        self.reward_range = [-1,1]
        '''pybullet側の初期設定'''
        p.connect(p.GUI)
        p.setAdditionalSearchPath("../ros_ws/src/test_car_description/urdf/")
        self.maxForce = 10
        self.reset()
        print("init_reset終了")
            
    def reset(self):
#         print("reset")
        print("====episode:"+str(self.episodes)+"=================")
        self.episodes += 1
        self.steps = 0
        targetX, targetY = np.random.permutation(np.arange(10))[0:2]
        self.targetPos = [targetX, targetY, 0]
        
        '''pybullet側'''
        #bulletの世界をリセット
        p.resetSimulation()
        #フィールドを表示
        p.setGravity(0,0,-10)
        self.planeId = p.loadURDF("plane100.urdf")
        
        #オブジェクトモデルを表示
        self.startPos = [0,0,0]
        self.startOrientation = p.getQuaternionFromEuler([0,0,0])
        self.car = p.loadURDF("test_car.urdf", self.startPos, self.startOrientation)
        
        # ターゲットを表示
        self.target = p.createCollisionShape(
            p.GEOM_CYLINDER, radius=0.2, height=2, collisionFramePosition=self.targetPos)
        p.createMultiBody(0, self.target)
        
#         print("reset end")
        return self.observation()
        
    def step(self, action):
        print("---step:"+str(self.steps)+"-------")
        self.steps += 1
        if action == 0:
            #前進
            p.setJointMotorControlArray(
                self.car, np.arange(p.getNumJoints(self.car))[1:], p.VELOCITY_CONTROL, 
                targetVelocities=[20,20,20,20],
                forces=np.ones(4)*self.maxForce)
        elif action == 1:
            #右
            p.setJointMotorControlArray(
                self.car, np.arange(p.getNumJoints(self.car))[1:], p.VELOCITY_CONTROL, 
                targetVelocities=[20,12,20,12],
                forces=np.ones(4)*self.maxForce)
        elif action == 2:
            #後退
            p.setJointMotorControlArray(
                self.car, np.arange(p.getNumJoints(self.car))[1:], p.VELOCITY_CONTROL, 
                targetVelocities=[-20,-20,-20,-20],
                forces=np.ones(4)*self.maxForce)
        elif action == 3:
            #左
            p.setJointMotorControlArray(
                self.car, np.arange(p.getNumJoints(self.car))[1:], p.VELOCITY_CONTROL, 
                targetVelocities=[12,20,12,20],
                forces=np.ones(4)*self.maxForce)
        
        for i in range(200):
            p.stepSimulation()
            time.sleep(1./240.)
        
        observation = self.observation()
        done = self.is_done()
        reward = self.reward()
#         print("step end")
        return observation, reward, done, {}
        
        
    def render(self, mode='rgb_array', close=False):
# #         print("render")
        if mode != "rgb_array":
            return np.array([])
        base_pos, orn = p.getBasePositionAndOrientation(self.car)
        cam_eye = np.array(base_pos) + [0.1,0,0.2]
        cam_target = np.array(base_pos) + [2,0,0.2]
        cam_upvec = [1,0,1]

        view_matrix = p.computeViewMatrix(
                cameraEyePosition=cam_eye,
                cameraTargetPosition=cam_target,
                cameraUpVector=cam_upvec)

        proj_matrix = p.computeProjectionMatrixFOV(
            fov=60, aspect=float(self.width)/self.height,
            nearVal=0.1, farVal=100.0)

        (_, _, rgb, _, mask) = p.getCameraImage(
            width=self.width, height=self.height, viewMatrix=view_matrix,
            projectionMatrix=proj_matrix, renderer=p.ER_BULLET_HARDWARE_OPENGL)

        rgb_array = np.array(rgb)
        rgb_array = rgb_array[:,:,:3]
        mask_array = np.array(mask)
        
#         print("render end")
        return rgb_array
    
    def close(self):
        pass

    def seed(self, seed=None):
        pass
    
    def observation(self):
#         print("observation")
        rgb_array = self.render()
        rgb_array = rgb_array / 255.0
        
        return rgb_array
    
    def green_detect(self, img):
        '''緑色のマスク'''
        # HSV色空間に変換
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        # 緑色のHSVの値域
        hsv_min = np.array([50, 100, 100])
        hsv_max = np.array([70, 255, 255])
        mask = cv2.inRange(hsv, hsv_min, hsv_max)
        return mask

    def calc_area(self, img):
        '''面積計算'''
        img = self.green_detect(img)
        pix_area = cv2.countNonZero(img)  # ピクセル数
        # パーセントを算出
        h, w = img.shape  # frameの面積
        per = round(100 * float(pix_area) / (w * h), 3)  # 0-100で規格化
        print('GREEN_AREA: ', per)
        return per

    def is_done(self):
        frame = self.render()
        self.area = self.calc_area(frame)
        if self.area >= 80:
            done = True
        elif self.steps > self.max_steps:
            done = True
        else:
            done = False
        return done

    def reward(self):
#         print("reward")
        if self.area >= 80:
            reward = 1
        elif self.steps > self.max_steps:
            reward = -1
        else:
            reward = 0
        print("reward: ", reward)
        return reward
        
'''        
    def is_done(self):
#         print("is_done")
        self.distance = p.getClosestPoints(
            bodyA=self.car, bodyB=self.target, distance=10, linkIndexA=0)[0][8]
        if self.distance <= 0:
            self.done = True
        elif self.steps > self.max_steps:
            self.done = True
        print("distance: ", self.distance)
        return self.done
'''

'        \n    def is_done(self):\n#         print("is_done")\n        self.distance = p.getClosestPoints(\n            bodyA=self.car, bodyB=self.target, distance=10, linkIndexA=0)[0][8]\n        if self.distance <= 0:\n            self.done = True\n        elif self.steps > self.max_steps:\n            self.done = True\n        print("distance: ", self.distance)\n        return self.done\n'

In [3]:
env = Test_car()

init
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.uint8'>. Please provide explicit dtype.[0m
init_reset終了


In [4]:
#これらとpybulletのウィンドウ表示(p.connect)を分けないとカーネルが死ぬ
from baselines import deepq
import datetime

In [5]:
def callback(lcl, glb):
    # stop training if reward exceeds 199
    total = sum(lcl['episode_rewards'][-101:-1]) / 100
    totalt = lcl['t']
    is_solved = totalt > 2000 and total >= 20
    return is_solved

cnn_to_mlp(convs, hiddens, dueling=False, layer_norm=False)
    - convs: [(int, int, int)]
        list of convolutional layers in form of
        (num_outputs, kernel_size, stride)
    - hiddens: [int]
        list of sizes of hidden layers

In [6]:
model = deepq.models.cnn_to_mlp([(512,5,1)], [256,64,4])

In [7]:
act = deepq.learn(
    env,
    q_func=model,
    lr=1e-2,
    max_timesteps=100000,
    buffer_size=50000,
    exploration_fraction=0.1,
    exploration_final_eps=0.02,
    print_freq=1,
    callback=callback
)
print("Saving model to test_car_model.pkl")
act.save("test_car_model.pkl")



error: Couldn't restore state.

In [10]:
p.disconnect()

In [11]:
env.observation_space.shape

(64, 64, 3)

(1, 320, 320, 3)

In [5]:
from keras.models import Sequential
from keras.layers import InputLayer, Dense, Reshape, Conv2D, Flatten, MaxPooling2D, BatchNormalization, Dropout
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

nb_actions = 4

Using TensorFlow backend.


In [14]:
model = Sequential()

model.add(Reshape((64, 64, 3), input_shape=(1,) + env.observation_space.shape))
model.add(Conv2D(8, kernel_size=(5,5), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(16, kernel_size=(5,5), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(32, kernel_size=(5,5), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_actions, activation='linear'))

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_2 (Reshape)          (None, 64, 64, 3)         0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 60, 60, 8)         608       
_________________________________________________________________
batch_normalization_10 (Batc (None, 60, 60, 8)         32        
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 30, 30, 8)         0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 26, 26, 16)        3216      
_________________________________________________________________
batch_normalization_11 (Batc (None, 26, 26, 16)        64        
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 13, 13, 16)        0         
__________

In [15]:
memory = SequentialMemory(limit=100000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [16]:
dqn.fit(env, nb_steps=100000, visualize=True, verbose=1)

Training for 100000 steps ...
Interval 1 (0 steps performed)
---step:0-------
GREEN_AREA:  0.0
reward:  0
    1/10000 [..............................] - ETA: 5:10:41 - reward: 0.0000e+00---step:1-------
GREEN_AREA:  0.0
reward:  0
    2/10000 [..............................] - ETA: 3:59:16 - reward: 0.0000e+00---step:2-------
GREEN_AREA:  0.0
reward:  0
    3/10000 [..............................] - ETA: 3:33:31 - reward: 0.0000e+00---step:3-------
GREEN_AREA:  0.0
reward:  0
    4/10000 [..............................] - ETA: 3:21:35 - reward: 0.0000e+00---step:4-------
GREEN_AREA:  0.0
reward:  0
    5/10000 [..............................] - ETA: 3:13:38 - reward: 0.0000e+00---step:5-------
GREEN_AREA:  0.0
reward:  0
    6/10000 [..............................] - ETA: 3:09:11 - reward: 0.0000e+00---step:6-------
GREEN_AREA:  0.0
reward:  0
    7/10000 [..............................] - ETA: 3:05:04 - reward: 0.0000e+00---step:7-------
GREEN_AREA:  0.0
reward:  0
    8/10000 [......



---step:0-------
GREEN_AREA:  0.0
reward:  0
   12/10000 [..............................] - ETA: 3:15:19 - reward: -0.0833---step:1-------
GREEN_AREA:  0.0
reward:  0
   13/10000 [..............................] - ETA: 3:12:24 - reward: -0.0769---step:2-------
GREEN_AREA:  0.0
reward:  0
   14/10000 [..............................] - ETA: 3:10:43 - reward: -0.0714---step:3-------
GREEN_AREA:  0.0
reward:  0
   15/10000 [..............................] - ETA: 3:09:06 - reward: -0.0667---step:4-------
GREEN_AREA:  0.0
reward:  0
   16/10000 [..............................] - ETA: 3:07:38 - reward: -0.0625---step:5-------
GREEN_AREA:  0.0
reward:  0
   17/10000 [..............................] - ETA: 3:06:17 - reward: -0.0588---step:6-------
GREEN_AREA:  0.0
reward:  0
   18/10000 [..............................] - ETA: 3:05:04 - reward: -0.0556---step:7-------
GREEN_AREA:  0.0
reward:  0
   19/10000 [..............................] - ETA: 3:04:21 - reward: -0.0526---step:8-------
GREEN_A

---step:0-------
GREEN_AREA:  0.0
reward:  0
   78/10000 [..............................] - ETA: 2:46:56 - reward: -0.0897---step:1-------
GREEN_AREA:  0.0
reward:  0
   79/10000 [..............................] - ETA: 2:46:46 - reward: -0.0886---step:2-------
GREEN_AREA:  0.0
reward:  0
   80/10000 [..............................] - ETA: 2:46:37 - reward: -0.0875---step:3-------
GREEN_AREA:  0.0
reward:  0
   81/10000 [..............................] - ETA: 2:46:26 - reward: -0.0864---step:4-------
GREEN_AREA:  0.0
reward:  0
   82/10000 [..............................] - ETA: 2:46:15 - reward: -0.0854---step:5-------
GREEN_AREA:  0.0
reward:  0
   83/10000 [..............................] - ETA: 2:46:07 - reward: -0.0843---step:6-------
GREEN_AREA:  0.0
reward:  0
   84/10000 [..............................] - ETA: 2:46:00 - reward: -0.0833---step:7-------
GREEN_AREA:  0.0
reward:  0
   85/10000 [..............................] - ETA: 2:45:56 - reward: -0.0824---step:8-------
GREEN_A

  142/10000 [..............................] - ETA: 2:42:42 - reward: -0.0845---step:10-------
GREEN_AREA:  0.0
reward:  -1
---step:0-------
GREEN_AREA:  0.0
reward:  0
  144/10000 [..............................] - ETA: 2:42:45 - reward: -0.0903---step:1-------
GREEN_AREA:  0.0
reward:  0
  145/10000 [..............................] - ETA: 2:42:46 - reward: -0.0897---step:2-------
GREEN_AREA:  0.0
reward:  0
  146/10000 [..............................] - ETA: 2:42:46 - reward: -0.0890---step:3-------
GREEN_AREA:  0.0
reward:  0
  147/10000 [..............................] - ETA: 2:42:43 - reward: -0.0884---step:4-------
GREEN_AREA:  0.0
reward:  0
  148/10000 [..............................] - ETA: 2:42:42 - reward: -0.0878---step:5-------
GREEN_AREA:  0.0
reward:  0
  149/10000 [..............................] - ETA: 2:42:37 - reward: -0.0872---step:6-------
GREEN_AREA:  0.0
reward:  0
  150/10000 [..............................] - ETA: 2:42:36 - reward: -0.0867---step:7-------
GREEN

  207/10000 [..............................] - ETA: 2:41:01 - reward: -0.0870---step:9-------
GREEN_AREA:  17.139
reward:  0
  208/10000 [..............................] - ETA: 2:40:59 - reward: -0.0865---step:10-------
GREEN_AREA:  0.244
reward:  -1
---step:0-------
GREEN_AREA:  0.0
reward:  0
  210/10000 [..............................] - ETA: 2:41:00 - reward: -0.0905---step:1-------
GREEN_AREA:  0.0
reward:  0
  211/10000 [..............................] - ETA: 2:41:00 - reward: -0.0900---step:2-------
GREEN_AREA:  0.0
reward:  0
  212/10000 [..............................] - ETA: 2:41:00 - reward: -0.0896---step:3-------
GREEN_AREA:  0.0
reward:  0
  213/10000 [..............................] - ETA: 2:40:59 - reward: -0.0892---step:4-------
GREEN_AREA:  0.0
reward:  0
  214/10000 [..............................] - ETA: 2:40:58 - reward: -0.0888---step:5-------
GREEN_AREA:  0.0
reward:  0
  215/10000 [..............................] - ETA: 2:40:57 - reward: -0.0884---step:6-------


  272/10000 [..............................] - ETA: 2:39:56 - reward: -0.0846---step:6-------
GREEN_AREA:  0.0
reward:  0
  273/10000 [..............................] - ETA: 2:39:55 - reward: -0.0842---step:7-------
GREEN_AREA:  0.0
reward:  0
  274/10000 [..............................] - ETA: 2:39:53 - reward: -0.0839---step:8-------
GREEN_AREA:  0.0
reward:  0
  275/10000 [..............................] - ETA: 2:39:51 - reward: -0.0836---step:9-------
GREEN_AREA:  0.0
reward:  0
  276/10000 [..............................] - ETA: 2:39:48 - reward: -0.0833---step:10-------
GREEN_AREA:  1.904
reward:  -1
---step:0-------
GREEN_AREA:  0.0
reward:  0
  278/10000 [..............................] - ETA: 2:39:47 - reward: -0.0863---step:1-------
GREEN_AREA:  0.0
reward:  0
  279/10000 [..............................] - ETA: 2:39:44 - reward: -0.0860---step:2-------
GREEN_AREA:  0.928
reward:  0
  280/10000 [..............................] - ETA: 2:39:44 - reward: -0.0857---step:3-------
G

  337/10000 [>.............................] - ETA: 2:38:26 - reward: -0.0861---step:5-------
GREEN_AREA:  0.0
reward:  0
  338/10000 [>.............................] - ETA: 2:38:25 - reward: -0.0858---step:6-------
GREEN_AREA:  0.0
reward:  0
  339/10000 [>.............................] - ETA: 2:38:25 - reward: -0.0855---step:7-------
GREEN_AREA:  0.0
reward:  0
  340/10000 [>.............................] - ETA: 2:38:24 - reward: -0.0853---step:8-------
GREEN_AREA:  0.0
reward:  0
  341/10000 [>.............................] - ETA: 2:38:21 - reward: -0.0850---step:9-------
GREEN_AREA:  0.0
reward:  0
  342/10000 [>.............................] - ETA: 2:38:19 - reward: -0.0848---step:10-------
GREEN_AREA:  0.0
reward:  -1
---step:0-------
GREEN_AREA:  0.0
reward:  0
  344/10000 [>.............................] - ETA: 2:38:17 - reward: -0.0872---step:1-------
GREEN_AREA:  0.0
reward:  0
  345/10000 [>.............................] - ETA: 2:38:17 - reward: -0.0870---step:2-------
GREEN

  402/10000 [>.............................] - ETA: 2:37:06 - reward: -0.0871---step:4-------
GREEN_AREA:  0.342
reward:  0
  403/10000 [>.............................] - ETA: 2:37:05 - reward: -0.0868---step:5-------
GREEN_AREA:  0.659
reward:  0
  404/10000 [>.............................] - ETA: 2:37:03 - reward: -0.0866---step:6-------
GREEN_AREA:  0.879
reward:  0
  405/10000 [>.............................] - ETA: 2:37:01 - reward: -0.0864---step:7-------
GREEN_AREA:  0.977
reward:  0
  406/10000 [>.............................] - ETA: 2:36:59 - reward: -0.0862---step:8-------
GREEN_AREA:  0.977
reward:  0
  407/10000 [>.............................] - ETA: 2:36:57 - reward: -0.0860---step:9-------
GREEN_AREA:  1.074
reward:  0
  408/10000 [>.............................] - ETA: 2:36:55 - reward: -0.0858---step:10-------
GREEN_AREA:  1.074
reward:  -1
---step:0-------
GREEN_AREA:  0.0
reward:  0
  410/10000 [>.............................] - ETA: 2:36:54 - reward: -0.0878---step:

  467/10000 [>.............................] - ETA: 2:36:00 - reward: -0.0878---step:3-------
GREEN_AREA:  0.0
reward:  0
  468/10000 [>.............................] - ETA: 2:35:59 - reward: -0.0876---step:4-------
GREEN_AREA:  0.0
reward:  0
  469/10000 [>.............................] - ETA: 2:35:59 - reward: -0.0874---step:5-------
GREEN_AREA:  0.0
reward:  0
  470/10000 [>.............................] - ETA: 2:35:59 - reward: -0.0872---step:6-------
GREEN_AREA:  0.0
reward:  0
  471/10000 [>.............................] - ETA: 2:35:57 - reward: -0.0870---step:7-------
GREEN_AREA:  0.0
reward:  0
  472/10000 [>.............................] - ETA: 2:35:55 - reward: -0.0869---step:8-------
GREEN_AREA:  0.0
reward:  0
  473/10000 [>.............................] - ETA: 2:35:54 - reward: -0.0867---step:9-------
GREEN_AREA:  0.0
reward:  0
  474/10000 [>.............................] - ETA: 2:35:53 - reward: -0.0865---step:10-------
GREEN_AREA:  0.0
reward:  -1
---step:0-------
GREEN

  532/10000 [>.............................] - ETA: 2:34:55 - reward: -0.0883---step:2-------
GREEN_AREA:  0.0
reward:  0
  533/10000 [>.............................] - ETA: 2:34:54 - reward: -0.0882---step:3-------
GREEN_AREA:  0.0
reward:  0
  534/10000 [>.............................] - ETA: 2:34:53 - reward: -0.0880---step:4-------
GREEN_AREA:  16.187
reward:  0
  535/10000 [>.............................] - ETA: 2:34:52 - reward: -0.0879---step:5-------
GREEN_AREA:  0.0
reward:  0
  536/10000 [>.............................] - ETA: 2:34:51 - reward: -0.0877---step:6-------
GREEN_AREA:  0.0
reward:  0
  537/10000 [>.............................] - ETA: 2:34:50 - reward: -0.0875---step:7-------
GREEN_AREA:  0.0
reward:  0
  538/10000 [>.............................] - ETA: 2:34:48 - reward: -0.0874---step:8-------
GREEN_AREA:  0.0
reward:  0
  539/10000 [>.............................] - ETA: 2:34:46 - reward: -0.0872---step:9-------
GREEN_AREA:  0.0
reward:  0
  540/10000 [>.......

  597/10000 [>.............................] - ETA: 2:33:51 - reward: -0.0888---step:1-------
GREEN_AREA:  0.977
reward:  0
  598/10000 [>.............................] - ETA: 2:33:50 - reward: -0.0886---step:2-------
GREEN_AREA:  0.977
reward:  0
  599/10000 [>.............................] - ETA: 2:33:48 - reward: -0.0885---step:3-------
GREEN_AREA:  0.0
reward:  0
  600/10000 [>.............................] - ETA: 2:33:47 - reward: -0.0883---step:4-------
GREEN_AREA:  0.0
reward:  0
  601/10000 [>.............................] - ETA: 2:33:47 - reward: -0.0882---step:5-------
GREEN_AREA:  0.0
reward:  0
  602/10000 [>.............................] - ETA: 2:33:46 - reward: -0.0880---step:6-------
GREEN_AREA:  0.0
reward:  0
  603/10000 [>.............................] - ETA: 2:33:44 - reward: -0.0879---step:7-------
GREEN_AREA:  0.0
reward:  0
  604/10000 [>.............................] - ETA: 2:33:43 - reward: -0.0877---step:8-------
GREEN_AREA:  0.0
reward:  0
  605/10000 [>......

---step:0-------
GREEN_AREA:  0.0
reward:  0
  663/10000 [>.............................] - ETA: 2:32:46 - reward: -0.0890---step:1-------
GREEN_AREA:  0.0
reward:  0
  664/10000 [>.............................] - ETA: 2:32:45 - reward: -0.0889---step:2-------
GREEN_AREA:  0.0
reward:  0
  665/10000 [>.............................] - ETA: 2:32:44 - reward: -0.0887---step:3-------
GREEN_AREA:  0.0
reward:  0
  666/10000 [>.............................] - ETA: 2:32:44 - reward: -0.0886---step:4-------
GREEN_AREA:  0.0
reward:  0
  667/10000 [=>............................] - ETA: 2:32:42 - reward: -0.0885---step:5-------
GREEN_AREA:  0.0
reward:  0
  668/10000 [=>............................] - ETA: 2:32:41 - reward: -0.0883---step:6-------
GREEN_AREA:  0.0
reward:  0
  669/10000 [=>............................] - ETA: 2:32:41 - reward: -0.0882---step:7-------
GREEN_AREA:  0.0
reward:  0
  670/10000 [=>............................] - ETA: 2:32:41 - reward: -0.0881---step:8-------
GREEN_A

  727/10000 [=>............................] - ETA: 2:31:44 - reward: -0.0880---step:10-------
GREEN_AREA:  0.0
reward:  -1
---step:0-------
GREEN_AREA:  8.667
reward:  0
  729/10000 [=>............................] - ETA: 2:31:41 - reward: -0.0892---step:1-------
GREEN_AREA:  8.667
reward:  0
  730/10000 [=>............................] - ETA: 2:31:39 - reward: -0.0890---step:2-------
GREEN_AREA:  8.667
reward:  0
  731/10000 [=>............................] - ETA: 2:31:38 - reward: -0.0889---step:3-------
GREEN_AREA:  8.667
reward:  0
  732/10000 [=>............................] - ETA: 2:31:36 - reward: -0.0888---step:4-------
GREEN_AREA:  8.667
reward:  0
  733/10000 [=>............................] - ETA: 2:31:34 - reward: -0.0887---step:5-------
GREEN_AREA:  8.667
reward:  0
  734/10000 [=>............................] - ETA: 2:31:32 - reward: -0.0886---step:6-------
GREEN_AREA:  8.667
reward:  0
  735/10000 [=>............................] - ETA: 2:31:30 - reward: -0.0884---step:

error: Cannot load URDF file.

In [27]:
# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format("test_car-v0"), overwrite=True)

In [18]:
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)