In [1]:
import gym
import math
import numpy as np
from random import random

In [8]:
def sigmoid(x):
    # math.exp() faster than np.exp() for scalars
    return 1 / (1 + math.exp(-x))

# todo: are disabled genes included?
def distance(individual1, individual2, c1, c2, c3):
    connections1 = individual1.connections
    connections2 = individual2.connections
    
    innovation_numbers1 = [connection.innovation_number for connection in connections1.values()]
    innovation_numbers2 = [connection.innovation_number for connection in connections2.values()]
    innovation_numbers = set().union(innovation_numbers1).union(innovation_numbers2)
    
    max1 = np.amax(innovation_numbers1)
    max2 = np.amax(innovation_numbers2)
    max_common = min(max1, max2)
    
    
    weight_diffs = []
    E = 0
    D = 0
    N = max(len(connections1), len(connections2))
    
    for innovation_number in innovation_numbers:
        if innovation_number in connections1 and innovation_number in connections2:
            # abs() faster than np.abs() for scalars
            weight_diff = abs(connections1[innovation_number].weight - connections2[innovation_number].weight)
            weight_diffs.append(weight_diff)
        else:
            if innovation_number > max_common:
                E = E + 1
            else:
                D = D + 1
    
    # sum() faster than np.sum()
    distance = (c1 * E) / N + (c2 * D) / N + c3 * sum(weight_diffs) / len(weight_diffs)
    return distance

class Connection(): # Gene
    def __init__(self, innovation_number, from_id, to_id, weight, enabled):
        self.innovation_number = innovation_number
        self.from_id = from_id
        self.to_id = to_id
        self.weight = weight
        self.enabled = enabled

class Node():
    def __init__(self, id):
        self.id = id
        self.value = 0

class Individual(): # Genome
    def __init__(self, input_ids, output_ids):
        self.nodes = {}
        self.connections = {}
        self.fitness = 0
        
        self.input_ids = input_ids
        self.output_ids = output_ids
        
        self.configure_new()
        
    def configure_new(self):
        for id in self.input_ids:
            self.nodes[id] = Node(id)
            
        for id in self.output_ids:
            self.nodes[id] = Node(id)
        
        i = 0
        for input_id in self.input_ids:
            for output_id in self.output_ids:
                self.connections[i] = Connection(i, input_id, output_id, random() * 2 - 1, True)
                i = i + 1
    
    def evaluate_fitness(self, env, actions):
        phenotype = Phenotype(self.connections, self.input_ids, self.output_ids)
        
        observation = env.reset()
        
        while True:
            env.render()

            output_id = phenotype.forward(observation)
            observation, reward, done, info = env.step(actions[output_id])
            self.fitness = self.fitness + reward

            if done:
                print(self.fitness)
                return
        
class Phenotype(): # neural network
    def __init__(self, connections, input_ids, output_ids):
        self.neurons = {}
        self.input_ids = input_ids
        self.output_ids = output_ids
        
        for id in input_ids:
            self.neurons[id] = Neuron()
        
        for connection in connections.values():
            if not connection.enabled:
                continue
            
            if connection.to_id not in self.neurons:
                self.neurons[connection.to_id] = Neuron()
            
            self.neurons[connection.to_id].incoming.append(connection)
    
    def forward(self, inputs):
        for id, value in zip(self.input_ids, inputs):
            self.neurons[id].value = value
            
        for id, neuron in self.neurons.items():
            if id in self.input_ids:
                continue
            
            sum = 0
            
            for connection in neuron.incoming:
                from_neuron = self.neurons[connection.from_id]
                sum = sum + connection.weight * from_neuron.value
                
            if sum == 0:
                print("nula")
            
            neuron.value = sigmoid(sum)
        
        # todo: ugly
        max_id = None
        max_value = -1
        for id in output_ids:
            if self.neurons[id].value > max_value:
                max_id = id
                max_value = self.neurons[id].value
        
        return max_id

class Neuron():
    def __init__(self):
        self.incoming = []
        self.value = 0

class Population():
    def __init__(self, pop_size, input_ids, output_ids):
        self.input_ids = input_ids
        self.output_ids = output_ids
        self.individuals = [Individual(input_ids, output_ids) for i in range(pop_size)]
        self.species = []
    
    def evaluate_fitness(self, env, actions):
        for individual in self.individuals:
            individual.evaluate_fitness(env, actions)
            
    def speciate(self, c1, c2, c3):
        # todo: implement

class Species():
    def __init__(self, representative):
        self.representative = representative
        self.individuals = []

In [9]:
env = gym.make('CartPole-v0')

pop_size = 10
num_iter = 1
c1 = 1.0
c2 = 1.0
c3 = 1.0

input_ids = [0, 1, 2, 3]
output_ids = [4, 5]

actions = {}
actions[output_ids[0]] = 0
actions[output_ids[1]] = 1

for i in range(num_iter):
    population = Population(pop_size, input_ids, output_ids)
    population.evaluate_fitness(env, actions)
    population.speciate(c1, c2, c3)
    
    # todo: rest of alg

env.render(close=True)

64.0
82.0
82.0
9.0
93.0
200.0
8.0
50.0
79.0
62.0
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
ASD
0
0
8
64.0 , 93.0 = 0.2937613956898507
200.0 , 79.0 = 0.3633378358744015
82.0 , 93.0 = 0.39294920119955845
82.0 , 79.0 = 0.40718369176713554
93.0 , 200.0 = 0.4289085097363597
93.0 , 79.0 = 0.45269968865893184
82.0 , 62.0 = 0.4540636356838888
82.0 , 50.0 = 0.468852095382112
64.0 , 82.0 = 0.4746759574973555
93.0 , 62.0 = 0.482446916725938
64.0 , 62.0 = 0.4923139810254788
9.0 , 50.0 = 0.5040753804156537
82.0 , 200.0 = 0.5042074281521169
82.0 , 9.0 = 0.5061484369127494
50.0 , 62.0 = 0.5127380489015876
93.0

In [10]:
env = gym.make('CartPole-v0')

for i_episode in range(20):
    observation = env.reset()
    
    for t in range(100):
        env.render()
        
        print(observation)
        
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        
        if done:
            print("Episode finished after {} timesteps".format(t + 1))
            break

env.render(close=True)

[ 0.02250477  0.00891653  0.00238745 -0.0119457 ]
[ 0.0226831  -0.18623958  0.00214853  0.28148954]
[ 0.01895831  0.00885165  0.00777832 -0.01051497]
[ 0.01913534  0.2038612   0.00756802 -0.30073363]
[ 0.02321256  0.0086322   0.00155335 -0.00567354]
[ 0.02338521  0.20373184  0.00143988 -0.29786596]
[ 0.02745984  0.39883323 -0.00451744 -0.59009443]
[ 0.03543651  0.59401814 -0.01631933 -0.88419693]
[ 0.04731687  0.78935784 -0.03400327 -1.18196513]
[ 0.06310403  0.9849042  -0.05764257 -1.48511015]
[ 0.08280211  1.18067949 -0.08734477 -1.79522288]
[ 0.1064157   1.37666433 -0.12324923 -2.11372618]
[ 0.13394899  1.18297044 -0.16552375 -1.86153287]
[ 0.1576084   0.99000549 -0.20275441 -1.62448363]
Episode finished after 14 timesteps
[-0.00423744 -0.03042053  0.00276961  0.0350391 ]
[-0.00484585 -0.22558209  0.00347039  0.32859458]
[-0.00935749 -0.42075327  0.01004229  0.62236989]
[-0.01777256 -0.616014    0.02248968  0.91819859]
[-0.03009284 -0.81143265  0.04085366  1.21786385]
[-0.04632149 -