# DRLND: Collaboration and Competition - let the trained models play

---

OK, the models are trained... Now let's see them playing.

### 1. Start the Environment

In [1]:
from unityagents import UnityEnvironment
import numpy as np

env = UnityEnvironment(file_name="Tennis.app")

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents 
num_agents = len(env_info.agents)

# size of each action
action_size = brain.vector_action_space_size

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]


INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


In [2]:
import model
import maddpg_agent
import parameters
import torch

In [3]:
state_size = 24
action_size = 2
TAU = 1e-2
LR_ACTOR = 1e-3
LR_CRITIC = 1e-4
fc1_units = 300
fc2_units = 400

agents = []
agent_1 = maddpg_agent.Agent(state_size, action_size, TAU, LR_ACTOR, LR_CRITIC, fc1_units, fc2_units, random_seed=123)
agent_2 = maddpg_agent.Agent(state_size, action_size, TAU, LR_ACTOR, LR_CRITIC, fc1_units, fc2_units, random_seed=345)

agent_1.actor_local.load_state_dict(torch.load('checkpoint_actor1_4.pth'))
agent_2.actor_local.load_state_dict(torch.load('checkpoint_actor2_4.pth'))

agents = [agent_1, agent_2]

In [4]:
scores = np.zeros(num_agents)
while True:
    actions = np.array([agents[i].act(states[i]) for i in range(num_agents)])

    env_info = env.step(actions)[brain_name]        
    next_states = env_info.vector_observations     
    rewards = env_info.rewards                    
    dones = env_info.local_done        

    states = next_states
    scores += rewards

    print('\rScores: {:.2f}\t{:.2f}'
              .format(scores[0], scores[1]), end="") 
    
    if np.any(dones):
        break
        
print("\nScores: {}".format(scores))

Scores: 2.60	2.60
Scores: [2.60000004 2.60000004]


In [None]:
env.close()