In [1]:
from unityagents import UnityEnvironment
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt

from ddpg_agent import Agent

In [2]:
env = UnityEnvironment(file_name='Reacher_Linux/Reacher.x86_64',seed=0)

brain_name = env.brain_names[0]
brain = env.brains[brain_name]

env_info = env.reset(train_mode=True)[brain_name]

num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_size -> 5.0
		goal_speed -> 1.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


Number of agents: 20
Size of each action: 4
There are 20 agents. Each observes a state with length: 33
The state for the first agent looks like: [ 0.00000000e+00 -4.00000000e+00  0.00000000e+00  1.00000000e+00
 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -1.00000000e+01  0.00000000e+00
  1.00000000e+00 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  5.75471878e+00 -1.00000000e+00
  5.55726624e+00  0.00000000e+00  1.00000000e+00  0.00000000e+00
 -1.68164849e-01]


In [3]:
agent = Agent(state_size,action_size,random_seed=0)

Initialising ReplayBuffer


In [4]:
def ddpg(num_episodes=300, max_time=1000):
    scores_window = deque(maxlen=100)
    scores_all = []


    for episode in range(1, num_episodes + 1):
        env_info = env.reset(train_mode=True)[brain_name]
        states = env_info.vector_observations

        scores = np.zeros(num_agents)
        for t in range(max_time):
            actions = []
            for j in range(num_agents):
                agent.reset()
                actions.append(agent.act(states[j]))
            env_info = env.step(actions)[brain_name]
            next_states = env_info.vector_observations
            rewards = env_info.rewards
            dones = env_info.local_done

            for i in range(num_agents):
                agent.step(t, states[i], actions[i], rewards[i], next_states[i], dones[i])

            states = next_states
            scores += rewards
            agent.learnParams()
            if t % 20:
                print('\rTimestep {}\tScore: {:.2f}\tmin: {:.2f}\tmax: {:.2f}'
                      .format(t, np.mean(scores), np.min(scores), np.max(scores)), end="")
            if np.any(dones):
                break
        score = np.mean(scores)
        scores_window.append(score)
        scores_all.append(score)

        print('\rEpisode {}\tScore: {:.2f}\tAverage Score: {:.2f}'.format(episode, score, np.mean(scores_window)),
              end="\n")
        if episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(episode, np.mean(scores_window)))
        if np.mean(scores_window) >= 30.0:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(episode - 100,
                                                                                         np.mean(scores_window)))
            torch.save(Agent.actor_local.state_dict(), 'checkpoint_actor.pth')
            torch.save(Agent.critic_local.state_dict(), 'checkpoint_critic.pth')
            break

    return scores_all

In [None]:
scores = ddpg()


fig = plt.figure()
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('score')
plt.xlabel('Episodes')
plt.show()

Episode 1	Score: 0.07	Average Score: 0.07.48
Episode 2	Score: 0.06	Average Score: 0.07.24
Episode 3	Score: 0.78	Average Score: 0.31.12
Episode 4	Score: 0.81	Average Score: 0.43.06
Episode 5	Score: 0.50	Average Score: 0.44.66
Episode 6	Score: 0.82	Average Score: 0.51.69
Episode 7	Score: 0.82	Average Score: 0.55.37
Episode 8	Score: 0.79	Average Score: 0.58.55
Episode 9	Score: 0.96	Average Score: 0.62.59
Episode 10	Score: 1.12	Average Score: 0.6724
Episode 11	Score: 1.09	Average Score: 0.7183
Episode 12	Score: 1.59	Average Score: 0.7885
Episode 13	Score: 1.38	Average Score: 0.8310
Episode 14	Score: 1.60	Average Score: 0.8966
Episode 15	Score: 1.57	Average Score: 0.9390
Episode 16	Score: 1.58	Average Score: 0.9736
Episode 17	Score: 1.50	Average Score: 1.0017
Episode 18	Score: 1.76	Average Score: 1.0515
Episode 19	Score: 1.78	Average Score: 1.0866
Episode 20	Score: 1.98	Average Score: 1.1322
Episode 21	Score: 2.43	Average Score: 1.1973
Episode 22	Score: 2.07	Average Score: 1.2357
Episode 23

Episode 175	Score: 28.56	Average Score: 19.1217
Episode 176	Score: 27.37	Average Score: 19.2716
Episode 177	Score: 26.73	Average Score: 19.4064
Episode 178	Score: 27.55	Average Score: 19.5507
Episode 179	Score: 28.60	Average Score: 19.7189
Episode 180	Score: 27.85	Average Score: 19.8865
Episode 181	Score: 28.94	Average Score: 20.0520
Episode 182	Score: 28.75	Average Score: 20.2202
Episode 183	Score: 29.16	Average Score: 20.4034
Episode 184	Score: 31.30	Average Score: 20.5866
Episode 185	Score: 29.25	Average Score: 20.7733
Episode 186	Score: 30.66	Average Score: 20.9697
Episode 187	Score: 28.24	Average Score: 21.1260
Episode 188	Score: 29.55	Average Score: 21.2867
Episode 189	Score: 29.66	Average Score: 21.4589
Episode 190	Score: 31.15	Average Score: 21.6377
Episode 191	Score: 30.53	Average Score: 21.8137
Episode 192	Score: 30.82	Average Score: 21.9927
Episode 193	Score: 30.18	Average Score: 22.1435
Episode 194	Score: 29.11	Average Score: 22.2978
Episode 195	Score: 29.21	Average Score: 