In [2]:
import torch
import torch.multiprocessing as mp
from adv_env import *
from adv_game import AdvGame
import torch.nn.functional as F
from a3c_net_2_layer import A3CNet
from a3c_net_1_layer import Net
from statistics import mean, stdev, median, mode
import seaborn as sns
import pandas as pd
from a3c_funcs import *
import pickle
import numpy as np
import neat
import random

!! N.B. Neither A3C nor NEAT are trained in this file.
- This was due to multiprocessing on various threads not working well within Jupyter Notebooks.
- If you want to re-run the training please use the following commands from the command line/ anaconda:
        - python a3c_main.py
        - python neat_main.py

# Creating the Advanced Environment:

In [3]:
def create_adv_env(damage, all_moves):
    types_file = 'type_advs.csv'
    moves_file = 'AdvEnvData/selected_moves.csv'
    poke_file = 'AdvEnvData/selected_poke_data.csv'
    trainer_file = 'AdvEnvData/selected_trainers.csv'
    cpu_damage_enabled = damage
    cpu_all_moves = all_moves
    game = AdvGame("Ash", "Elite_four", poke_file, trainer_file,
                moves_file, types_file, cpu_damage_enabled, cpu_all_moves)

    return Adv_env(game)

In [4]:
# Creating Environment:
adv_env = create_adv_env(True, False)

# Functions to Run One Episode in Environment:

- These will be used to test the trained models on 1,000 generated episodes further down in this notebook:

In [5]:
def run_one_episode_random(env, verbose):
    ob = env.reset()
    done = False
    while not done:
        a_idx = random.randint(0,4)
        move_pool = env.game.trainer_team[0].moves
        action = move_pool[a_idx]
        ob_nxt, r, done = env.move_turn(action, verbose)
        ob = ob_nxt

In [6]:
def run_one_episode_a3c(env, model, verbose):
    ob = env.reset()
    done = False
    while not done:
        a = model.choose_action(conv_np(ob[None, :]))
        move_pool = env.game.trainer_team[0].moves
        action = move_pool[a]
        ob_nxt, r, done = env.move_turn(action, verbose)
        ob = ob_nxt

In [7]:
def run_one_episode_neat(env, model, verbose):
    ob = env.reset()
    done = False
    while not done:
        a_probs = model.activate(ob)
        a_idx = np.argmax(a_probs)
        move_pool = env.game.trainer_team[0].moves
        action = move_pool[a_idx]
        ob_nxt, r, done = env.move_turn(action, verbose)
        ob = ob_nxt

# Defining Models:

# Trained A3C Model:

In [10]:
# Building a blank 2 layer neural net, with the right input/output dimensions:
a3c_trained = A3CNet(adv_obs_space, adv_actions_possible)

In [11]:
# Loading the best saved model:
model_state_dict = torch.load('Models/a3c_advanced_best.pt')

In [12]:
# Loading the saved weights onto the blank neural net:
a3c_trained.load_state_dict(model_state_dict)

<All keys matched successfully>

In [16]:
# Un-comment this cell to see one episode run with a trained A3C model playing the game:
#run_one_episode_a3c(adv_env, a3c_trained, True)

# Trained NEAT Model:

In [17]:
# Loading the configuration file specifying the NEAT hyperparameters:
config = neat.Config(
        neat.DefaultGenome,
        neat.DefaultReproduction,
        neat.DefaultSpeciesSet,
        neat.DefaultStagnation,
        'neat_config_adv.txt'
    )

In [18]:
# Loading the genomes saved in the best found model:
genomes = pickle.load(open('Models/neat_advanced_best.pkl', 'rb'))

In [19]:
# Using the saved genomes and config to load the model:
neat_model = neat.nn.FeedForwardNetwork.create(genomes, config)

In [22]:
# Un-comment this cell to see one episode run with a trained NEAT model playing the game:
#run_one_episode_neat(adv_env, neat_model, True)

# Comparing 1000 Runs:

## Random Model:

In [13]:
reward_rand = []
super_eff_rand = []
hp_potions_rand = []
    
for i in range(1000):
    run_one_episode_random(adv_env, False)
    _, s_eff, _, hp, reward, _ = adv_env.game.get_metrics()
    reward_rand.append(reward)
    super_eff_rand.append(s_eff)
    hp_potions_rand.append(hp)

In [45]:
print(mean(reward_rand))
print(np.std(reward_rand))
print(mean(super_eff_rand))
print(mean(hp_potions_rand))

11.2
5.3829360018488055
2.455
1.775


## A3C Best Model:

In [103]:
reward_a3c = []
super_eff_a3c = []
hp_potions_a3c = []
    
for i in range(1000):
    run_one_episode_a3c(adv_env, a3c_trained, False)
    _, s_eff, _, hp, reward, _ = adv_env.game.get_metrics()
    reward_a3c.append(reward)
    super_eff_a3c.append(s_eff)
    hp_potions_a3c.append(hp)

In [46]:
print(mean(reward_a3c))
print(np.std(reward_a3c))
print(mean(super_eff_a3c))
print(mean(hp_potions_a3c))

440.55
157.72113840573178
292.294
71.316


## NEAT Best Model:

In [100]:
reward_neat = []
super_eff_neat = []
hp_potions_neat = []
    
for i in range(1000):
    run_one_episode_neat(adv_env, neat_model, False)
    _, s_eff, _, hp, reward, _ = adv_env.game.get_metrics()
    reward_neat.append(reward)
    super_eff_neat.append(s_eff)
    hp_potions_neat.append(hp)

In [94]:
print(mean(reward_neat))
print(np.std(reward_neat))
print(mean(super_eff_neat))
print(mean(hp_potions_neat))

308.09
204.0628871206129
65.59
50.33
