In [1]:
import gym
import math
import numpy as np
from random import random

In [2]:
def sigmoid(x):
    # math.exp() faster than np.exp() for scalars
    return 1 / (1 + math.exp(-x))

# todo: are disabled genes included?
def distance(individual1, individual2, c1, c2, c3):
    connections1 = individual1.connections
    connections2 = individual2.connections
    
    innovation_numbers1 = [connection.innovation_number for connection in connections1.values()]
    innovation_numbers2 = [connection.innovation_number for connection in connections2.values()]
    innovation_numbers = set().union(innovation_numbers1).union(innovation_numbers2)
    
    max1 = np.amax(innovation_numbers1)
    max2 = np.amax(innovation_numbers2)
    max_common = min(max1, max2)
    
    weight_diffs = []
    E = 0
    D = 0
    N = max(len(connections1), len(connections2))
    
    for innovation_number in innovation_numbers:
        if innovation_number in connections1 and innovation_number in connections2:
            # abs() faster than np.abs() for scalars
            weight_diff = abs(connections1[innovation_number].weight - connections2[innovation_number].weight)
            weight_diffs.append(weight_diff)
        else:
            if innovation_number > max_common:
                E = E + 1
            else:
                D = D + 1
    
    # sum() faster than np.sum()
    distance = (c1 * E) / N + (c2 * D) / N + c3 * sum(weight_diffs) / len(weight_diffs)
    return distance

class Connection(): # Gene
    def __init__(self, innovation_number, from_id, to_id, weight, enabled):
        self.innovation_number = innovation_number
        self.from_id = from_id
        self.to_id = to_id
        self.weight = weight
        self.enabled = enabled

class Node():
    def __init__(self, id):
        self.id = id
        self.value = 0

class Individual(): # Genome
    def __init__(self, input_ids, output_ids):
        self.nodes = {}
        self.connections = {}
        self.fitness = 0
        
        self.input_ids = input_ids
        self.output_ids = output_ids
        
        self.configure_new()
        
    def configure_new(self):
        for id in self.input_ids:
            self.nodes[id] = Node(id)
            
        for id in self.output_ids:
            self.nodes[id] = Node(id)
        
        i = 0
        for input_id in self.input_ids:
            for output_id in self.output_ids:
                self.connections[i] = Connection(i, input_id, output_id, random() * 2 - 1, True)
                i = i + 1
    
    def evaluate_fitness(self, env, actions):
        phenotype = Phenotype(self.connections, self.input_ids, self.output_ids)
        
        observation = env.reset()
        
        while True:
            env.render()

            output_id = phenotype.forward(observation)
            observation, reward, done, info = env.step(actions[output_id])
            self.fitness = self.fitness + reward

            if done:
                print(self.fitness)
                return
        
class Phenotype(): # neural network
    def __init__(self, connections, input_ids, output_ids):
        self.neurons = {}
        self.input_ids = input_ids
        self.output_ids = output_ids
        
        for id in input_ids:
            self.neurons[id] = Neuron()
        
        for connection in connections.values():
            if not connection.enabled:
                continue
            
            if connection.to_id not in self.neurons:
                self.neurons[connection.to_id] = Neuron()
            
            self.neurons[connection.to_id].incoming.append(connection)
    
    def forward(self, inputs):
        for id, value in zip(self.input_ids, inputs):
            self.neurons[id].value = value
            
        for id, neuron in self.neurons.items():
            if id in self.input_ids:
                continue
            
            sum = 0
            
            for connection in neuron.incoming:
                from_neuron = self.neurons[connection.from_id]
                sum = sum + connection.weight * from_neuron.value
                
            if sum == 0:
                print("nula")
            
            neuron.value = sigmoid(sum)
        
        # todo: ugly
        max_id = None
        max_value = -1
        for id in output_ids:
            if self.neurons[id].value > max_value:
                max_id = id
                max_value = self.neurons[id].value
        
        return max_id

class Neuron():
    def __init__(self):
        self.incoming = []
        self.value = 0

class Population():
    def __init__(self, pop_size, input_ids, output_ids):
        self.input_ids = input_ids
        self.output_ids = output_ids
        self.individuals = [Individual(input_ids, output_ids) for i in range(pop_size)]
        self.species = []
    
    def evaluate_fitness(self, env, actions):
        for individual in self.individuals:
            individual.evaluate_fitness(env, actions)
            
    def speciate(self, c1, c2, c3):
        # todo: implement
        pass

class Species():
    def __init__(self, representative):
        self.representative = representative
        self.individuals = []

In [3]:
env = gym.make('CartPole-v0')

pop_size = 10
num_iter = 1
c1 = 1.0
c2 = 1.0
c3 = 1.0

input_ids = [0, 1, 2, 3]
output_ids = [4, 5]

actions = {}
actions[output_ids[0]] = 0
actions[output_ids[1]] = 1

for i in range(num_iter):
    population = Population(pop_size, input_ids, output_ids)
    population.evaluate_fitness(env, actions)
    population.speciate(c1, c2, c3)
    
    # todo: rest of alg

env.render(close=True)

38.0
9.0
11.0
9.0
22.0
10.0
62.0
9.0
173.0
94.0


In [4]:
env = gym.make('CartPole-v0')

for i_episode in range(20):
    observation = env.reset()
    
    for t in range(100):
        env.render()
        
        print(observation)
        
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        
        if done:
            print("Episode finished after {} timesteps".format(t + 1))
            break

env.render(close=True)

[-0.0229579  -0.00974606 -0.04428536  0.00559824]
[-0.02315282 -0.20420584 -0.0441734   0.28398628]
[-0.02723694 -0.0084826  -0.03849367 -0.02229494]
[-0.02740659  0.18716964 -0.03893957 -0.32687024]
[-0.0236632  -0.00737691 -0.04547698 -0.04671697]
[-0.02381074  0.18836665 -0.04641132 -0.35339437]
[-0.0200434   0.38411677 -0.0534792  -0.66034339]
[-0.01236107  0.57994055 -0.06668607 -0.96937451]
[ -7.62256485e-04   7.75891286e-01  -8.60735611e-02  -1.28223848e+00]
[ 0.01475557  0.97199763 -0.11171833 -1.6005834 ]
[ 0.03419552  1.16825136 -0.14373    -1.92590573]
[ 0.05756055  1.36459327 -0.18224811 -2.25949304]
Episode finished after 12 timesteps
[-0.04984317 -0.02060189  0.02254643  0.04819827]
[-0.05025521 -0.21603976  0.0235104   0.34790866]
[-0.05457601 -0.02125996  0.03046857  0.06273112]
[-0.05500121 -0.21680521  0.0317232   0.36486923]
[-0.05933731 -0.4123633   0.03902058  0.66738383]
[-0.06758458 -0.60800556  0.05236826  0.97209297]
[-0.07974469 -0.80378967  0.07181012  1.2807