# Project #3 : Collaboration and Competition

## Part 2: Watch the Trained Agent

---

Use this notebook to watch a trained Unity Tennis agent

### 1. Import libraries

In [1]:
import torch
import numpy as np
from collections import deque
#import random
import time

### 2. Load the Environment and Agent classes 

In [2]:
from unityagents import UnityEnvironment
from maddpg_agent import MADDPG_Agent


   *****************
   *** Using CPU ***
   *****************



### 3. Initialize the environment

Next, we will start the environment!  **_Before running the code cell below_**, change the `file_name` parameter to match the location of the Unity environment that you downloaded.

- **Mac**: `"path/to/Tennis.app"`
- **Windows** (x86): `"path/to/Tennis_Windows_x86/Tennis.exe"`
- **Windows** (x86_64): `"path/to/Tennis_Windows_x86_64/Tennis.exe"`
- **Linux** (x86): `"path/to/Tennis_Linux/Tennis.x86"`
- **Linux** (x86_64): `"path/to/Tennis_Linux/Tennis.x86_64"`
- **Linux** (x86, headless): `"path/to/Tennis_Linux_NoVis/Tennis.x86"`
- **Linux** (x86_64, headless): `"path/to/Tennis_Linux_NoVis/Tennis.x86_64"`

For instance, if you are using a Mac, then you downloaded `Tennis.app`.  If this file is in the same folder as the notebook, then the line below should appear as follows:
```
env = UnityEnvironment(file_name="Tennis.app")
```

In [3]:
# on my iMac 
env = UnityEnvironment(file_name="Tennis.app")

# in my Udacity Workspace
#env = UnityEnvironment(file_name='/data/Tennis_Linux/Tennis.x86_64')

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


In [4]:
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
env_info = env.reset(train_mode=False)[brain_name]     # reset the environment    
num_agents = len(env_info.agents)
action_size = brain.vector_action_space_size
states = env_info.vector_observations                  # get the current state (for each agent)
state_size = states.shape[1]

### 4. Initialize the agent

**Make sure the parameters below are set to the same values used for training !**

In [5]:
# set the agent and model hyperparameters

class Config():
    def __init__(self):

        self.ADD_NOISE = False        # if True, Ornstein-Uhlenbeck Noise is added to actions
    
        self.FC1_UNITS_ACTOR = 256   # number of nodes in Actor's first fully connected layer
        self.FC2_UNITS_ACTOR = 128   # number of nodes in Actor's second fully connected layer
        self.FCS1_UNITS_CRITIC = 256 # number of nodes in Critic's first fully connected layer
        self.FC2_UNITS_CRITIC = 128  # number of nodes in Critic's second fully connected layer
        
        
        self.SEED = 42               # seed for random number generator
        self.PRINT_EVERY = 100       # number of episodes after which the training procedures prints out a persistent update

        self.TARGET_SCORE = 0.5      # target score; training is complete when target score is met
        self.WINDOW_SIZE = 100       # number of episodes over which the average episode score is calculated
        
        self.BUFFER_SIZE = int(1e6)  # replay buffer size
        self.BATCH_SIZE = 128        # minibatch size

        self.LR_ACTOR = 1e-4         # learning rate of the actor 
        self.LR_CRITIC = 5e-4        # learning rate of the critic
        #self.LR_ACTOR = 1e-3         # learning rate of the actor
        #self.LR_CRITIC = 1e-3        # learning rate of the critic
        #self.WEIGHT_DECAY = 0        # regularization weight decay (L2 penalty)

        self.LEARN_EVERY = 1         # learning timestep interval
        self.LEARN_NUM = 1           # number of learning passes
        
        self.GAMMA = 0.99            # reward discount factor
        self.TAU = 7e-2              # for soft update of target parameters

        self.ADD_NOISE = True        # if True, Ornstein-Uhlenbeck Noise is added to actions
        #self.OU_NOISE = True         # if True, Ornstein-Uhlenbeck Noise is added to actions
        self.OU_SIGMA = 0.2          # Ornstein-Uhlenbeck noise parameter volatility
        self.OU_THETA = 0.12         # Ornstein-Uhlenbeck noise parameter speed of mean reversion
        self.EPS_INITIAL = 5.5       # initial value of epsilon in Ornstein-Uhlenbeck noise process
        self.EPS_TERMINAL = 0        # terminal value of epsilon in Ornstein-Uhlenbeck noise process
        self.EPS_DECAY = 250         # number of episodes over which epsilon decays in OU noise process


config = Config()

In [6]:
agent_0 = MADDPG_Agent(state_size, action_size, 1, config)
agent_1 = MADDPG_Agent(state_size, action_size, 1, config)

# Load the weights generated during training
agent_0.actor_local.load_state_dict(torch.load("checkpoint_actor_0.pth", map_location=lambda storage, loc: storage))
#agent_0.critic_local.load_state_dict(torch.load("checkpoint_critic_0.pth", map_location=lambda storage, loc: storage))

# Load the weights generated during training
agent_1.actor_local.load_state_dict(torch.load("checkpoint_actor_1.pth", map_location=lambda storage, loc: storage))
#agent_1.critic_local.load_state_dict(torch.load("checkpoint_critic_1.pth", map_location=lambda storage, loc: storage))

### 5. Watch the agent interact with its environment

In [7]:
# Set the number of time steps you want to watch
steps = 0
scores = np.zeros(num_agents)                          # initialize the score (for each agent)

while np.max(scores) < 0.5 :
    steps += 1
    a0 = agent_0.play(states)           # agent 0 chooses an action
    a1 = agent_1.play(states)           # agent 1 chooses an action
    actions = np.concatenate((a0, a1), axis=0).flatten() # combine agent actions
    env_info = env.step(actions)[brain_name]             # send both agents' actions together to the environment
    next_states = np.reshape(env_info.vector_observations, (1, 48)) # combine the agent next states
    rewards = env_info.rewards                           # get reward
    dones = env_info.local_done                          # see if episode finished
    scores += rewards                            # update the score
    print('\rStep {:3}\tScore : {:.2f}'.format(steps, np.max(scores)), end="")
    states = next_states                                 # roll over states to next time step
    if np.any(dones):                                    # exit loop if episode finished
        break

Step 172	Score : 0.50

In [8]:
env.close()