In [None]:
import gym, gym_futbol_v1
%matplotlib inline
from gym_futbol_v1.envs import Side
from utils import notebook_render_simple, notebook_render_mlp, notebook_render_lstm, MultiAgentWrapper, MultiAgentTrain
from utils import EvalCallback, evaluate_policy
from utils.video_utils import notebook_render_helper
from training import ppo2_mlp_policy_train, ppo2_mlp_lstm_policy_train, ppo2_mlp_multi_agent_train
from stable_baselines import PPO2
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines.common.policies import MlpPolicy, MlpLstmPolicy
import numpy as np

# Test Env

In [None]:
env = gym.make("futbol-v1")
env.render()
print(env.reset().reshape((-1,4)))

In [None]:
total_reward = notebook_render_simple(env, length=5, random=False, action=[0,2,0,3], side=Side.left)

In [None]:
env.ball.owner_side

In [None]:
env.ball.last_owner_side

In [None]:
env.reset()

# PPO2 with Mlp Policy

In [None]:
model, save_dir = ppo2_mlp_policy_train(time_step=10**4, verbose=0)

In [None]:
best_model = PPO2.load(save_dir + "/best_model")

In [None]:
total_reward = notebook_render_mlp('futbol-v1', best_model)

# PPO2 with MlpLstm Policy

In [None]:
model, save_dir = ppo2_mlp_lstm_policy_train(time_step=10**4, verbose=0)

In [None]:
model.learn(total_timesteps=10**5)

In [None]:
total_reward = notebook_render_lstm('futbol-v1', model)

# Multi Agent with PPO2

In [None]:
model_left, model_right, save_dir = ppo2_mlp_multi_agent_train(time_step=10**4, num_turn=2, verbose=0)

In [None]:
env_right = MultiAgentWrapper(gym.make('futbol-v1'), Side.right)
env_right.set_agent(model_left, Side.left)
reward = notebook_render_mlp(env_right, model_right, length=300, side=Side.right)

In [None]:
env_left = MultiAgentWrapper(gym.make('futbol-v1'), Side.left)
env_left.set_agent(model_right, Side.right)
reward = notebook_render_mlp(env_left, model_left, length=300, side=Side.left)

## Multi Agent with the defined class

In [None]:
multi_agent = MultiAgentTrain(policy=MlpLstmPolicy, policy_name='ppo2-lstm')

In [None]:
multi_agent.get_info()

In [None]:
multi_agent.notebook_render_left()

In [None]:
multi_agent.train(num_turn=1, time_step=10 ** 4, verbose=1, save=True, save_interval=2)

In [None]:
multi_agent.notebook_render_right()

In [None]:
multi_agent.env_right.team_right_agent # should be BaseAgent as the action is overwritten by the model left in render fun

In [None]:
multi_agent.env_right.team_left_agent

In [None]:
multi_agent.save_models()

# Load Pretrained Model from ./zoo

In [None]:
!pwd

In [None]:
lstm_model = PPO2.load('zoo/2v2/ppo2-lstm-2v2-5e3')

In [None]:
total_reward = notebook_render_lstm('futbol-v1', lstm_model)

In [None]:
evaluate_policy(lstm_model, env)

In [None]:
eval_env = gym.make('futbol-v1')
eval_callback = EvalCallback(eval_env, best_model_save_path='./training/logs',
                            log_path='./training/logs', 
                            eval_freq=100, n_eval_episodes = 1,
                            deterministic=False, render=False)

In [None]:
num_envs = 8
env = gym.make('futbol-v1')
env = DummyVecEnv([lambda: env] * num_envs)

In [None]:
model = PPO2(MlpPolicy, env, verbose=1)

In [None]:
model.learn(total_timesteps=10**4, callback=eval_callback)