In [1]:
"""
Dependencies: gym, ale-py

pip install gym[atari,accept-rom-license]

"""

import gym
import random
import numpy as np

from copy import copy, deepcopy

from functools import cmp_to_key

import torch
from torch import nn
from torchvision import transforms

  import distutils.spawn


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
transform_obs = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5]),

])

def obs_to_tensor(obs):
   return torch.unsqueeze(torch.flatten(transform_obs(np.array(obs))), 0).to(device)


# Initialize weights to something random.
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

# CNN used for determining actions.
class Agent_CNN(nn.Module):
  def __init__(self):
    super().__init__() 
    self.model = nn.Sequential(
            # Block 1
                nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(7,7), stride=(1,1), padding=(3,3), bias=False),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                # Block 2
                nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1), bias=False),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1), bias=False),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                # Block 3
                nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3), stride=(2,2), padding=(1,1), bias=False),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3,3), stride=(1,1), padding=(1,1), bias=False),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                # Output
                nn.Flatten(), 
                nn.Linear(in_features=512, out_features=7, bias=True),
                nn.Softmax(),
            )
    
  def forward(self, x):
    arr = self.model(x)
    arr = arr.cpu().detach().numpy()
    action = np.argmax(arr)
    return action
  
class Agent_MLP(nn.Module):
    def __init__(self):
      super().__init__() 
      self.model = nn.Sequential(
        nn.Linear(100800, 1024),
        nn.ReLU(),     
        nn.Dropout(p=0.05),
        nn.BatchNorm1d(1024),
        nn.Linear(1024, 128),
        nn.ReLU(),     
        nn.Dropout(p=0.05),
        nn.BatchNorm1d(128),
        nn.Linear(128, 7),
        nn.ReLU(),     
        nn.Dropout(p=0.05),
        nn.BatchNorm1d(7),
        nn.Softmax()
      )

      # Manually initialize random weights
      self.model.apply(init_weights)
    
    def forward(self, x):
      arr = self.model(x)
      arr = arr.cpu().detach().numpy()
      action = np.argmax(arr)
      return action
    
    def getWeights(self):
      weight_dict = {}
      weight_dict["l1"] = self.model[0].weight.cpu().detach().numpy()
      weight_dict["l2"] = self.model[4].weight.cpu().detach().numpy()
      weight_dict["l3"] = self.model[8].weight.cpu().detach().numpy()

      return weight_dict
    
    def setWeights(self, weight_dict):

      self.model[0].weight = nn.Parameter(torch.from_numpy(weight_dict["l1"]).float().to(device))
      self.model[4].weight = nn.Parameter(torch.from_numpy(weight_dict["l2"]).float().to(device))
      self.model[8].weight = nn.Parameter(torch.from_numpy(weight_dict["l3"]).float().to(device))


In [4]:
"""
Action space is of size 7:

0 NOOP
1 FIRE
2 UP
3 RIGHT
4 LEFT
5 RIGHTFIRE
6 LEFTFIRE
"""

"""
A class for each chromosome.
"""
class Chromosome:

    """
    This is used to initialize a chromosome

    args is used since Python doesn't support multiple constructors. (self, env) initializes a chromosome with a random
    action sequence and is used during population initialization.

    (self, actions, env) is used during crossover to generate new chromosomes given a crossed action buffer.
    """
    def __init__(self, *args):

        if len(args) > 1:
            self.nn = Agent_MLP()
            self.nn.to(device)
            self.nn.eval()
            self.nn.setWeights(args[0])
            self.env = args[1]
            self.reward_sum = 0
            self.time_alive = 0

            return

        # Stores actions taken, rewards obtained, total reward for the run, and the number of frames that the agent has been alive for.

        self.nn = Agent_MLP()
        self.nn.to(device)
        self.nn.eval()
        
        self.action_buffer = []
        self.reward_buffer = []

        self.reward_sum = 0
        self.time_alive = 0

        # Store the provided env as a class variable.
        self.env = args[0]

        obs = self.env.reset()
        done = False

        num_lives = 4

        while(done == False):

            action = self.nn(obs_to_tensor(obs))

            obs, reward, done, info = self.env.step(action)

            self.action_buffer.append(action)
            self.reward_buffer.append(reward)

            self.time_alive = self.time_alive + 1
            self.reward_sum = self.reward_sum + reward

            if info['lives'] > num_lives:
                num_lives = info['lives']

            if info['lives'] < num_lives:
                break

    """
    This reruns the environment and performs a new run with the updated action buffer to generate new stats.
    """
    def update(self):

        # Stores updated actions and rewards taken.
        self.nn.eval()
        
        new_action_buffer = []
        new_reward_buffer = []

        counter = 0

        self.reward_sum = 0
        self.time_alive = 0

        obs = self.env.reset()

        done = False

        num_lives = 4

        while(done == False):

            # Determine if action is from supplied buffer or random.
            counter = counter + 1

            action = self.nn(obs_to_tensor(obs))

            obs, reward, done, info = self.env.step(action)

            new_action_buffer.append(action)
            new_reward_buffer.append(reward)
            self.time_alive = self.time_alive + 1
            self.reward_sum = self.reward_sum + reward

            if info['lives'] > num_lives:
                num_lives = info['lives']

            if info['lives'] < num_lives:
                break

        self.action_buffer = new_action_buffer
        self.reward_buffer = new_reward_buffer

    """
    Since the environment has a certain level of randomness, this function reruns it without saving results
    except the aggregate reward and time alive. Used to gauge how accurate a score is during fitness.
    """
    def simulate(self):

        # This is needed in case we manage to survive for more frames than the action buffer has actions, in which case we do random ones.
        counter = 0

        new_reward_sum = 0
        new_time_alive = 0

        obs = self.env.reset()

        done = False

        num_lives = 4

        while(done == False):

            # Determine if action is from supplied buffer or random.
            counter = counter + 1

            action = self.nn(obs_to_tensor(obs))

            obs, reward, done, info = self.env.step(action)

            new_time_alive = self.time_alive + 1
            new_reward_sum = self.reward_sum + reward

            if info['lives'] > num_lives:
                num_lives = info['lives']

            if info['lives'] < num_lives:
                break

        return new_reward_sum, new_time_alive

    """
    This mutates a chromosome's genes (NN weights) based on a supplied probability. If the mutation occurs, the action is replaced with a random one.
    """
    def mutate(self, mutation_rate, mutation_factor=0.01):
        weight_dict = self.nn.getWeights()
        for key, val in weight_dict.items():
            for i in range(0, len(val)):
                for j in range(0, len(val[i])):
                    if random.random() <= mutation_rate:
                        curWeight = val[i][j]
                        mutation_amount = curWeight * mutation_factor
                        if random.random() >= 0.5:
                            val[i][j] = curWeight + mutation_amount
                        else:
                            val[i][j] = curWeight - mutation_amount

            weight_dict[key] = val

        self.nn.setWeights(weight_dict)





In [5]:
"""
GA and helper methods
"""

#Comparator for a Chromosome list
def chromosome_comparator(a, b):
    if a.reward_sum > b.reward_sum:
        return -1
    elif a.reward_sum == b.reward_sum:
        if a.time_alive < b.time_alive:
            return -1
        else:
            return 1
    else:
        return 1

# Feed this to sorting function
compare_key = cmp_to_key(chromosome_comparator)

# Generates initial population
def create_initial_population(number, env):
    population = []

    for i in range(0, number):
        print(i)
        population.append(Chromosome(env))

    return population

"""
Applies a fitness function and then ranks the population in descending order of best to worst.

In this case, the primary marker of fitness is the high score. Assuming a tie, then secondary criterion is how long each agent has been alive.

Also returns the best chromosome found.
"""
def selection(population):

    # The first stage is the fitness function.
    optimal_index = 0
    longest_alive = 1000000
    max_score = 0

    for i, chrom in enumerate(population):

        if chrom.reward_sum > max_score:
            best = True
        elif (chrom.reward_sum == max_score) and chrom.time_alive < longest_alive:
            best = True
        else:
            best = False

        if best:
            longest_alive = chrom.time_alive
            max_score = chrom.reward_sum
            optimal_index = i

    # Select best chromosome.
    best_chromosome = population[optimal_index]

    # Sort chromosomes in descending order.
    population.sort(key=compare_key, reverse=True)

    # This was for an AIS test, should remain commented out.

    #for i in range(0, len(population)):
        #population[i] = best_chromosome

    # Select only the best 50% of chromosomes.
    return population[0:int(len(population)/2)], best_chromosome

"""
Performs crossover on a population and returns the new population.

Crossover is done by combining the weights of the parental nns.
"""
def crossover(population):
    crossed_population = []
    for i in range(0, len(population) - 1, 2):

        father = population[i]

        # Edge case if there's only 1 chromosome left, in which case it pairs with a random one.
        if i + 1 > len(population) - 1:
            mother = population[random.randint(0, len(population)-1)]
            
        else:
            mother = population[i+1]

        if father.reward_sum == 0 and mother.reward_sum == 0:
            crossover_probability = 0.5
        elif mother.reward_sum == 0 and father.reward_sum > 0:
            crossover_probability = 0.999
        elif father.reward_sum == 0 and mother.reward_sum > 0:
            crossover_probability = 0.001

        # Sample proportionally to the score
        else:
            crossover_probability = father.reward_sum / (father.reward_sum + mother.reward_sum)

        if crossover_probability >= 0.999:
            crossover_probability = 0.999

        # They each have 4 kids, replenishing the population.
        for j in range(0, 4):

            motherWeights = mother.nn.getWeights()
            fatherWeights = father.nn.getWeights()

            newWeights = deepcopy(fatherWeights)

            for key, val in newWeights.items():
                for k in range(0, len(val)):
                    for l in range(0, len(val[k])):
                        if random.random() >= crossover_probability:
                            val[k][l] = motherWeights[key][k][l]

                newWeights[key] = val

            crossed_population.append(Chromosome(newWeights, father.env))

        #population[i] = None
        #population[i+1] = None


    return crossed_population

"""
Mutate the population. Rate represents chance of mutation.
"""
def mutate_population(population, rate=0.01):
    for _, chrom in enumerate(population): 
        chrom.mutate(rate)

"""
Rerun the environment for all chromosomes and produce updated runs.
"""
def update_population(population):
    for i, chrom in enumerate(population):     
         chrom.update()

In [6]:
"""
GA loop
"""

env = gym.make('ALE/Assault-v5', full_action_space=False)

n_iter = 200
population_size = 4
mutation_rate = 0.01

best_alive = 0
best_score = 0
best_chromo = None

population = create_initial_population(population_size, env)

for i in range(0, n_iter):

    # Run the fitness function and determine if the best chromosome is better than the best one we've stored for all runs.
    selected_population, best_chromo_candidate = selection(population)
    longest_alive = best_chromo_candidate.time_alive
    max_score = best_chromo_candidate.reward_sum

    if best_chromo_candidate.reward_sum > best_score:
            best = True
    elif (best_chromo_candidate.reward_sum == best_score) and best_chromo_candidate.time_alive < best_alive:
            best = True
    else:
        if i == 0:
                best = True
        else:
                best = False

    if best:
           best_chromo = deepcopy(best_chromo_candidate)
           best_alive = best_chromo_candidate.time_alive
           best_score = best_chromo_candidate.reward_sum
           

    # Print stats.
    print(f"Best Chromosome for Run {i + 1} -> Longest Alive: {longest_alive}    High Score: {max_score}")

    # Cross, Mutate, Update, and prepare the new population for the next run.
    crossed_population = crossover(selected_population)
    mutate_population(crossed_population, mutation_rate)
    update_population(crossed_population)

    population = crossed_population

print(f"Best Chromosome -> Longest Alive: {best_alive}    High Score: {best_score}")

0


  input = module(input)


1
2
3
Best Chromosome for Run 1 -> Longest Alive: 129    High Score: 84.0
Best Chromosome for Run 2 -> Longest Alive: 199    High Score: 210.0
Best Chromosome for Run 3 -> Longest Alive: 1505    High Score: 336.0
Best Chromosome for Run 4 -> Longest Alive: 129    High Score: 0.0
Best Chromosome for Run 5 -> Longest Alive: 65    High Score: 0.0
Best Chromosome for Run 6 -> Longest Alive: 289    High Score: 0.0
Best Chromosome for Run 7 -> Longest Alive: 1121    High Score: 0.0
Best Chromosome for Run 8 -> Longest Alive: 1313    High Score: 0.0
Best Chromosome for Run 9 -> Longest Alive: 609    High Score: 0.0
Best Chromosome for Run 10 -> Longest Alive: 1825    High Score: 0.0
Best Chromosome for Run 11 -> Longest Alive: 993    High Score: 0.0
Best Chromosome for Run 12 -> Longest Alive: 289    High Score: 0.0
Best Chromosome for Run 13 -> Longest Alive: 961    High Score: 0.0
Best Chromosome for Run 14 -> Longest Alive: 161    High Score: 0.0
Best Chromosome for Run 15 -> Longest Alive

KeyboardInterrupt: 

In [9]:
"""
This displays the game to evaluate our best chromosome.
"""
test_env = gym.make('ALE/Assault-v5', render_mode="human", full_action_space=False)

done = False
obs = test_env.reset()

transform_obs = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5]),

])
#convert_tensor = transforms.ToTensor()
obs_ten = transform_obs(obs)
print(obs_ten.shape)

obs_np = np.array(obs)
obs_flat = obs_np.flatten()

print(len(obs_flat))

time_alive = 0
reward_sum = 0

action_list = best_chromo.action_buffer
limit = best_chromo.time_alive 
counter = 0

while(done == False):

    # Determine if action is from supplied buffer or random.
    counter = counter + 1

    if counter <= limit:
        action = action_list[counter-1]
    else:  
        action = random.randint(0, 6)
    
    _, reward, done, info = test_env.step(action)

    reward_sum = reward_sum + reward
    time_alive = time_alive + 1

print(f"Longest Alive: {time_alive}    High Score: {reward_sum}")

torch.Size([3, 210, 160])
100800


KeyboardInterrupt: 

In [None]:
"""
self.layers = nn.Sequential(
      nn.Linear(21, 189),
      nn.Hardswish(),     
      nn.Dropout(p=0.05),
      nn.BatchNorm1d(189),
      nn.Linear(189, 63),
      nn.Hardswish(),     
      nn.Dropout(p=0.05),
      nn.BatchNorm1d(63),
      nn.Linear(63, 9),
      nn.Hardswish(),     
      nn.Dropout(p=0.05),
      nn.BatchNorm1d(9),
      nn.Linear(9, 1),
      nn.Sigmoid()
    )
"""

# Function to instantiate the Torch MLP.
class Agent(nn.Module):
  def __init__(self):
    super().__init__() 
    self.model = nn.Sequential(
            # Block 1
                nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(7,7), stride=(1,1), padding=(3,3), bias=False),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                # Block 2
                nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1), bias=False),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1), bias=False),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                # Block 3
                nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3), stride=(2,2), padding=(1,1), bias=False),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3,3), stride=(1,1), padding=(1,1), bias=False),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                # Output
                nn.Flatten(), 
                nn.Linear(in_features=512, out_features=7, bias=True),
                nn.Softmax(),
            )
    
  def forward(self, x):
    return self.model(x)

In [5]:
fuck = [0,1,2]
print(fuck[0:1])

[0]
