In [1]:
import torch
import torch.multiprocessing as mp
from extr_env import *
from extr_game import ExtrGame
import torch.nn.functional as F
from a3c_net_2_layer import A3CNet
from a3c_net_1_layer import Net
from statistics import mean, stdev, median, mode
import seaborn as sns
import pandas as pd
from a3c_funcs import *
import pickle
import neat
import random

!! N.B. Neither A3C nor NEAT are trained in this file.
- This was due to multiprocessing on various threads not working well within Jupyter Notebooks.
- If you want to re-run the training please use the following commands from the command line/ anaconda:
        - python a3c_main.py
        - python neat_main.py

# Creating the Extreme Environment:

In [2]:
def create_extr_env(damage, all_moves):
    types_file = 'type_advs.csv'
    moves_file = 'ExtrEnvData/selected_moves.csv'
    poke_file = 'ExtrEnvData/selected_poke_data.csv'
    trainer_file = 'ExtrEnvData/selected_trainers.csv'
    cpu_damage_enabled = damage
    cpu_all_moves = all_moves
    game = ExtrGame("Ash_starters", poke_file, trainer_file,
                moves_file, types_file, cpu_damage_enabled, cpu_all_moves)

    return Extr_env(game)

In [3]:
# Creating Environment:
extr_env = create_extr_env(True, True)

# Functions to Run One Episode in Environment:

In [22]:
def run_one_episode_random(env, verbose):
    ob = env.reset()
    done = False
    while not done:
        a_idx = random.randint(0,5)
        move_pool = env.game.remaining_trainer_team[0].moves
        action = move_pool[a_idx]
        ob_nxt, r, done = env.move_turn(action, verbose)
        ob = ob_nxt

In [4]:
def run_one_episode_a3c(env, model, verbose):
    obs = env.reset()
    done = False
    while not done:
        a = model.choose_action(conv_np(obs[None, :]))
        move_pool = env.game.remaining_trainer_team[0].moves
        action = move_pool[a]
        obs_nxt, r, done = env.move_turn(action, verbose)
        obs = obs_nxt

In [17]:
def run_one_episode_neat(env, model, verbose):
    ob = env.reset()
    done = False
    while not done:
        a_probs = model.activate(ob)
        a_idx = np.argmax(a_probs)
        move_pool = env.game.remaining_trainer_team[0].moves
        action = move_pool[a_idx]
        ob_nxt, r, done = env.move_turn(action, verbose)
        ob = ob_nxt

# Defining Models:

## Trained A3C Model:

In [5]:
# Building a blank 2 layer neural net, with the right input/output dimensions:
a3c_trained = A3CNet(extr_obs_space, extr_actions_possible)

In [6]:
# Loading the best saved model:
model_state_dict = torch.load('Models/a3c_extreme_best.pt')

In [7]:
# Loading the saved weights onto the blank neural net:
a3c_trained.load_state_dict(model_state_dict)

<All keys matched successfully>

In [31]:
# Un-comment this cell to see one episode run with a trained A3C model playing the game:
#run_one_episode_a3c(extr_env, a3c_trained, True)

## Trained NEAT Model:

In [13]:
# Loading the configuration file specifying the NEAT hyperparameters:
config = neat.Config(
        neat.DefaultGenome,
        neat.DefaultReproduction,
        neat.DefaultSpeciesSet,
        neat.DefaultStagnation,
        'neat_config_extr.txt'
    )

In [14]:
# Loading the genomes saved in the best found model:
genomes = pickle.load(open('Models/neat_extreme_best.pkl', 'rb'))

In [15]:
# Using the saved genomes and config to load the model:
neat_model = neat.nn.FeedForwardNetwork.create(genomes, config)

In [20]:
# Un-comment this cell to see one episode run with a trained NEAT model playing the game:
#run_one_episode_neat(extr_env, neat_model, True)

# Comparing 1000 Runs:

## Random Policy Model:

In [15]:
reward_rand = []
super_eff_rand = []
se_taken_rand = []
t_beaten_rand = []
switches_rand = []
    
for i in range(1000):
    run_one_episode_random(extr_env, False)
    reward, s_eff, se_taken, t_beaten, switch, _ = extr_env.game.get_metrics()
    reward_rand.append(reward)
    super_eff_rand.append(s_eff)
    se_taken_rand.append(se_taken)
    t_beaten_rand.append(t_beaten)
    switches_rand.append(switch)

In [49]:
print(mean(reward_rand))
print(np.std(reward_rand))
print(mean(super_eff_rand))
print(mean(se_taken_rand))
print(mean(t_beaten_rand))
print(mean(switches_rand))

20.58
9.40806037395594
3.057
2.256
0.207
4.081


## Trained A3C Model:

In [17]:
reward_a3c = []
super_eff_a3c = []
se_taken_a3c = []
t_beaten_a3c = []
switches_a3c = []
    
for i in range(1000):
    run_one_episode_a3c(extr_env, a3c_trained, False)
    reward, s_eff, se_taken, t_beaten, switch, _ = extr_env.game.get_metrics()
    reward_a3c.append(reward)
    super_eff_a3c.append(s_eff)
    se_taken_a3c.append(se_taken)
    t_beaten_a3c.append(t_beaten)
    switches_a3c.append(switch)

In [44]:
print(mean(reward_a3c))
print(np.std(reward_a3c))
print(mean(super_eff_a3c))
print(mean(se_taken_a3c))
print(mean(t_beaten_a3c))
print(mean(switches_a3c))

344.562
170.1937429989716
117.892
44.459
24.98
17.966


## Trained NEAT Model:

In [76]:
reward_neat = []
super_eff_neat = []
se_taken_neat = []
t_beaten_neat = []
switches_neat = []
    
for i in range(1000):
    run_one_episode_neat(extr_env, neat_model, False)
    reward, s_eff, se_taken, t_beaten, switch, _ = extr_env.game.get_metrics()
    reward_neat.append(reward)
    super_eff_neat.append(s_eff)
    se_taken_neat.append(se_taken)
    t_beaten_neat.append(t_beaten)
    switches_neat.append(switch)

In [77]:
print(mean(reward_neat))
print(np.std(reward_neat))
print(mean(super_eff_neat))
print(mean(se_taken_neat))
print(mean(t_beaten_neat))
print(mean(switches_neat))

61.901
54.31558891331291
7.926
6.69
3.277
0
