# Continuous Control

In [None]:
from unityagents import UnityEnvironment
import numpy as np
from ddpg_agent import Agent
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Loads the environment
env = UnityEnvironment(file_name='/data/Reacher_Linux_NoVis/Reacher.x86_64')

In [None]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [None]:
# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

In [None]:
# Instantiate an agent
agent = Agent(state_size=state_size, action_size=action_size, env=env, random_seed=1)

In [None]:
# Train it
scores, average_scores = agent.train()

In [None]:
# Plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(1, len(scores)+1), scores, label="Episode Scores")
plt.plot(np.arange(1, len(average_scores)+1), average_scores, label="Average Scores (over 100 episodes)")
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.legend()
plt.show()

In [None]:
# Show the actor's architecture
print(agent.actor_local)

In [None]:
# Show the critic's architecture
print(agent.critic_local)

In [None]:
env.close()