# Continuous Control

---

In this notebook, you will learn how to use the Unity ML-Agents environment for the second project of the [Deep Reinforcement Learning Nanodegree](https://www.udacity.com/course/deep-reinforcement-learning-nanodegree--nd893) program.


Now it's your turn to train your own agent to solve the environment!  When training the environment, set `train_mode=True`, so that the line for resetting the environment looks like the following:
```python
env_info = env.reset(train_mode=True)[brain_name]
```

In [1]:
# Declare agent
import sys

# Add the subfolder to sys.path
sys.path.append('./reacher')

from unityagents import UnityEnvironment
import numpy as np
import importlib
import model
import agent
importlib.reload(agent)
importlib.reload(model)

# env = UnityEnvironment(file_name="Reacher_Windows_x86_64/Reacher.exe")
env = UnityEnvironment(file_name="reacher/Reacher_Multi.app")
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))


# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)
# declare the agent
config = agent.DDPGConfig(
    num_state = state_size,
    num_action = action_size,
    num_agent = num_agents,
    actor_learning_rate=1e-4,
    critic_learning_rate=1e-4,
    batch_size=128,
    update_every_timestamp=20,
    update_time_each_stamp=10,
    discount_factor=0.99,
    replay_buffer_size=1e6,
    weight_decay=0)
agent = agent.DDPGAgent(config)

output_folder_path = "reacher/weights/"


Mono path[0] = '/Users/nuocheng/Desktop/Reinforcement_Learning/Reinforcement-Learning-Projects/reacher/Reacher_Multi.app/Contents/Resources/Data/Managed'
Mono config path = '/Users/nuocheng/Desktop/Reinforcement_Learning/Reinforcement-Learning-Projects/reacher/Reacher_Multi.app/Contents/MonoBleedingEdge/etc'


INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_size -> 5.0
		goal_speed -> 1.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


Size of each action: 4
There are 20 agents. Each observes a state with length: 33
Number of agents: 20


In [None]:

# Function to train the network
from collections import deque
import torch
import matplotlib.pyplot as plt

def train(agent, env, num_agents, folder_path, n_episodes=2000, max_t=1000):

    all_rewards = []
    all_avg_rewards = []
    rewards_window = deque(maxlen=100)  # last 100 scores
    for i_episode in range(n_episodes):
        env_info = env.reset(train_mode=True)[brain_name]
        state = env_info.vector_observations[0 : num_agents]
        total_reward = np.zeros(num_agents)
        agent.reset()
        for t in range(max_t):
            # get action from the agent based on the curernt states
            action = agent.act(state)
            # update the env based on the action
            env_info = env.step(action)[brain_name]
            next_state = env_info.vector_observations[0 : num_agents]
            reward = env_info.rewards[0 : num_agents]
            done = env_info.local_done[0 : num_agents]
            # update the agent
            agent.step(state, action, reward, next_state, done)
            # update for the next iteration
            state = next_state
            total_reward += reward
            # the episode reachs the end, so need to start a new episode
            if np.any( done[0 : num_agents] ):
                break
        all_rewards.append(np.mean(total_reward))
        rewards_window.append(np.mean(total_reward))
        all_avg_rewards.append(np.mean(rewards_window))
        print('Episode {}\tAverage Score: {:.2f}\t Total Score: {:.2f}'.format(i_episode, np.mean(rewards_window), np.mean(total_reward)))
        if i_episode % 100 == 0:
            print('Episode {}\tAverage Score: {:.2f}\t Total Score: {:.2f}'.format(i_episode, np.mean(rewards_window), np.mean(total_reward)))
        if np.mean(rewards_window)>=32.0:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(rewards_window)))
            break
        if i_episode % 200 == 0:
            agent.save_model(folder_path, '_ep_' + str(i_episode))
    
    return all_rewards, all_avg_rewards

scores, avg_scores = train(agent, env, num_agents, output_folder_path, 2000, 1000)
env.close()

# save the trained weight
agent.save_model(output_folder_path)

# plot the score distribution
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), avg_scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

Episode 0	Average Score: 0.27	 Total Score: 0.27
Episode 0	Average Score: 0.27	 Total Score: 0.27


KeyboardInterrupt: 


### 3. Load and Test a Model

The code block below loads an existing model, and automatically test it with the opened environment

In [None]:
# Test the model
# Declare agent
import sys

# Add the subfolder to sys.path
sys.path.append('./reacher')

from unityagents import UnityEnvironment
import numpy as np
import importlib
import model
import agent
importlib.reload(agent)
importlib.reload(model)

# env = UnityEnvironment(file_name="Reacher_Windows_x86_64/Reacher.exe")
env = UnityEnvironment(file_name="reacher/Reacher_Multi.app")
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))


# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)
# declare the agent
config = agent.DDPGConfig(
    num_state = state_size,
    num_action = action_size,
    num_agent = num_agents,
    actor_learning_rate=1e-4,
    critic_learning_rate=1e-4,
    batch_size=128,
    update_every_timestamp=20,
    update_time_each_stamp=10,
    discount_factor=0.99,
    replay_buffer_size=1e6,
    weight_decay=0)
output_folder_path = "reacher/weights/"
agent = agent.DDPGAgent(config)
agent.load_model(output_folder_path)

env_info = env.reset(train_mode=False)[brain_name] # reset the environment
states = env_info.vector_observations              # get the current state
cnt  = 1
for _ in range(3000):
    print(cnt)
    cnt+=1
    actions = agent.act(states)
    env_info = env.step(actions)[brain_name]
    states = env_info.vector_observations
    if np.any(env_info.local_done):
        break
env.close()



Mono path[0] = '/Users/nuocheng/Desktop/Reinforcement_Learning/Reinforcement-Learning-Projects/reacher/Reacher_Multi.app/Contents/Resources/Data/Managed'
Mono config path = '/Users/nuocheng/Desktop/Reinforcement_Learning/Reinforcement-Learning-Projects/reacher/Reacher_Multi.app/Contents/MonoBleedingEdge/etc'


INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_size -> 5.0
		goal_speed -> 1.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


Size of each action: 4
There are 20 agents. Each observes a state with length: 33
Number of agents: 20
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
2

KeyboardInterrupt: 