In [1]:
from unityagents import UnityEnvironment
import numpy as np
import torch
from collections import deque

import matplotlib.pyplot as plt
%matplotlib inline

from ddpg_agent import Agent

In [2]:
env = UnityEnvironment(file_name="Tennis_Linux/Tennis.x86_64", worker_id=1, seed=2)
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


In [3]:
# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents 
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

Number of agents: 2
Size of each action: 2
There are 2 agents. Each observes a state with length: 24
The state for the first agent looks like: [ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.         -6.29742813 -1.5
 -0.          0.          7.17024279  6.         -0.          0.        ]


In [4]:
agent  = Agent(state_size, action_size, random_seed = 2)
agent_kwargs = {"state_size": state_size, "action_size": action_size, "random_seed": 2,}
agents = [Agent(**agent_kwargs) for _ in range(num_agents)]
len_agents = len(str(num_agents))
for agent_id,agent in enumerate(agents):
    chkpt_path_actor = "actor_" + str(agent_id).zfill(len_agents) + "_solved.pth";
    chkpt_path_critic = "critic_" + str(agent_id).zfill(len_agents) + "_solved.pth"
    agent.actor_local.load_state_dict(torch.load(chkpt_path_actor))
    agent.critic_local.load_state_dict(torch.load(chkpt_path_critic))

scores_list = []
n_episodes = 2
t_max = 1000
for i_episode in range(1,n_episodes+1):
    env_info = env.reset(train_mode=False)[brain_name]
    states = env_info.vector_observations
    agent.reset()
    scores = np.zeros(num_agents)
    for _ in range(t_max):
        actions = agent.act(states)
        env_info = env.step(actions)[brain_name]
        next_states = env_info.vector_observations         # get next state (for each agent)
        rewards = env_info.rewards                         # get reward (for each agent)
        dones = env_info.local_done                        # see if episode finished

        states  = next_states
        scores += rewards                                  # update the score (for each agent)
        if np.any(dones):                                  # exit loop if episode finished
            break
    print('Episode {}: {}'.format(i_episode, scores))
    scores_list.append(np.max(scores))
print('Mean score is: ',np.mean(np.array(scores_list)))

env.close()

Episode 1: [2.20000003 2.19000003]
Episode 2: [2.39000004 2.50000004]
Mean score is:  2.350000035017729
