In [1]:
from random import random, sample, choice
from math import floor
from tqdm import tqdm
from numpy import array, dot, mean
from numpy.linalg import pinv

In [2]:
def generate_data():
    coeff = [0.4, -0.3, 0.2, -0.1]
    x = [[random() for j in range(len(coeff))] for i in range(1000)]
    y = [dot(i, coeff) for i in x]
    return array(x), array(y)

In [3]:
def multiple_linear_regression(inputs, outputs):
    X, Y = array(inputs), array(outputs)
    X_t, Y_t = X.transpose(), Y.transpose()
    coeff = dot((pinv((dot(X_t, X)))), (dot(X_t, Y)))
    Y_p = dot(X, coeff)
    Y_mean = mean(Y)
    SST = array([(i - Y_mean) ** 2 for i in Y]).sum()
    SSR = array([(i - j) ** 2 for i, j in zip(Y, Y_p)]).sum()
    COD = (1 - (SSR / SST)) * 100.0
    av_error = (SSR / len(Y))
    return {'COD': COD, 'coeff': coeff, 'error': av_error}

In [4]:
def check_termination_condition(best_individual):
    if ((best_individual['COD'] >= 99.9)
            or (generation_count == max_generations)):
        return True
    else:
        return False

In [5]:
def create_individual(individual_size):
    return [random() for i in range(individual_size)]

In [6]:
def create_population(individual_size, population_size):
    return [create_individual(individual_size) for i in range(population_size)]

In [7]:
def get_fitness(individual, inputs):
    predicted_outputs = dot(array(inputs), array(individual))
    output_mean = mean(outputs)
    SST = array(
        [(i - output_mean) ** 2 for i in outputs]
        ).sum()
    SSR = array(
        [(i - j) ** 2 for i, j in zip(outputs, predicted_outputs)]
        ).sum()
    COD = (1 - (SSR / SST)) * 100.0
    average_error = (SSR / len(outputs))
    return {'COD': COD, 'error': average_error, 'coeff': individual}

In [8]:
def evaluate_population(population):
    fitness_list = [get_fitness(individual, inputs)
                    for individual in tqdm(population)]
    error_list = sorted(fitness_list, key=lambda i: i['error'])
    best_individuals = error_list[: selection_size]
    best_individuals_stash.append(best_individuals[0]['coeff'])
    print('Error: ', best_individuals[0]['error'],
          'COD: ', best_individuals[0]['COD'])
    return best_individuals

In [9]:
def crossover(parent_1, parent_2):
    child = {}
    loci = [i for i in range(0, individual_size)]
    loci_1 = sample(loci, floor(0.5*(individual_size)))
    loci_2 = [i for i in loci if i not in loci_1]
    chromosome_1 = [[i, parent_1['coeff'][i]] for i in loci_1]
    chromosome_2 = [[i, parent_2['coeff'][i]] for i in loci_2]
    child.update({key: value for (key, value) in chromosome_1})
    child.update({key: value for (key, value) in chromosome_2})
    return [child[i] for i in loci]

In [10]:
def mutate(individual):
    loci = [i for i in range(0, individual_size)]
    no_of_genes_mutated = floor(probability_of_gene_mutating*individual_size)
    loci_to_mutate = sample(loci, no_of_genes_mutated)
    for locus in loci_to_mutate:
        gene_transform = choice([-1, 1])
        change = gene_transform*random()
        individual[locus] = individual[locus] + change
    return individual

In [11]:
def get_new_generation(selected_individuals):
    parent_pairs = [sample(selected_individuals, 2)
                    for i in range(population_size)]
    offspring = [crossover(pair[0], pair[1]) for pair in parent_pairs]
    offspring_indices = [i for i in range(population_size)]
    offspring_to_mutate = sample(
        offspring_indices,
        floor(probability_of_individual_mutating*population_size)
    )
    mutated_offspring = [[i, mutate(offspring[i])]
                         for i in offspring_to_mutate]
    for child in mutated_offspring:
        offspring[child[0]] = child[1]
    return offspring

In [12]:
inputs, outputs = generate_data()
individual_size = len(inputs[0])
population_size = 1000
selection_size = floor(0.1*population_size)
max_generations = 100
probability_of_individual_mutating = 0.1
probability_of_gene_mutating = 0.25
best_possible = multiple_linear_regression(inputs, outputs)
best_individuals_stash = [create_individual(individual_size)]
initial_population = create_population(individual_size, 1000)
current_population = initial_population
termination = False
generation_count = 0
while termination is False:
    current_best_individual = get_fitness(best_individuals_stash[-1], inputs)
    print('Generation: ', generation_count)
    best_individuals = evaluate_population(current_population)
    current_population = get_new_generation(best_individuals)
    termination = check_termination_condition(current_best_individual)
    generation_count += 1
else:
    print(get_fitness(best_individuals_stash[-1], inputs))

Generation:  0


100%|██████████| 1000/1000 [00:00<00:00, 1445.10it/s]


Error:  0.04048370153749131 COD:  -68.36714437115458
Generation:  1


100%|██████████| 1000/1000 [00:00<00:00, 1458.74it/s]


Error:  0.0143413513465157 COD:  40.355938786868904
Generation:  2


100%|██████████| 1000/1000 [00:00<00:00, 1376.60it/s]


Error:  0.009146950421345867 COD:  61.95886582356307
Generation:  3


100%|██████████| 1000/1000 [00:00<00:00, 1492.00it/s]


Error:  0.0054499044667081584 COD:  77.33446257858877
Generation:  4


100%|██████████| 1000/1000 [00:00<00:00, 1489.74it/s]


Error:  0.002781077003412127 COD:  88.43381470671213
Generation:  5


100%|██████████| 1000/1000 [00:00<00:00, 1438.92it/s]


Error:  0.0016704473850247476 COD:  93.05279790736478
Generation:  6


100%|██████████| 1000/1000 [00:00<00:00, 1017.72it/s]


Error:  0.001454582636976184 COD:  93.95055502489707
Generation:  7


100%|██████████| 1000/1000 [00:00<00:00, 1300.54it/s]


Error:  0.0011795053930510296 COD:  95.09457022810854
Generation:  8


100%|██████████| 1000/1000 [00:00<00:00, 1258.34it/s]


Error:  0.0008387424930966697 COD:  96.51176465929998
Generation:  9


100%|██████████| 1000/1000 [00:00<00:00, 1267.19it/s]


Error:  0.0003424832434572876 COD:  98.57565085439448
Generation:  10


100%|██████████| 1000/1000 [00:01<00:00, 803.12it/s]


Error:  0.0001736061020413909 COD:  99.27799182050961
Generation:  11


100%|██████████| 1000/1000 [00:00<00:00, 1106.50it/s]


Error:  0.0001736061020413909 COD:  99.27799182050961
Generation:  12


100%|██████████| 1000/1000 [00:01<00:00, 939.20it/s]


Error:  0.0001736061020413909 COD:  99.27799182050961
Generation:  13


100%|██████████| 1000/1000 [00:00<00:00, 1070.90it/s]


Error:  0.00017336244118130694 COD:  99.27900517851911
Generation:  14


100%|██████████| 1000/1000 [00:00<00:00, 1246.46it/s]


Error:  0.00015119516375186064 COD:  99.37119638281952
Generation:  15


100%|██████████| 1000/1000 [00:00<00:00, 1476.71it/s]


Error:  0.00014258483940889183 COD:  99.40700574971726
Generation:  16


100%|██████████| 1000/1000 [00:00<00:00, 1466.60it/s]


Error:  0.00014258483940889183 COD:  99.40700574971726
Generation:  17


100%|██████████| 1000/1000 [00:00<00:00, 1401.98it/s]


Error:  0.00013706394458728395 COD:  99.4299665280104
Generation:  18


100%|██████████| 1000/1000 [00:00<00:00, 1431.07it/s]


Error:  0.00013706394458728395 COD:  99.4299665280104
Generation:  19


100%|██████████| 1000/1000 [00:00<00:00, 1463.11it/s]


Error:  8.36083505260658e-05 COD:  99.6522823089529
Generation:  20


100%|██████████| 1000/1000 [00:00<00:00, 1404.31it/s]


Error:  8.36083505260658e-05 COD:  99.6522823089529
Generation:  21


100%|██████████| 1000/1000 [00:00<00:00, 1433.88it/s]


Error:  9.901522881020581e-05 COD:  99.58820684149664
Generation:  22


100%|██████████| 1000/1000 [00:00<00:00, 1464.66it/s]


Error:  9.901522881020581e-05 COD:  99.58820684149664
Generation:  23


100%|██████████| 1000/1000 [00:00<00:00, 1437.76it/s]


Error:  9.901522881020581e-05 COD:  99.58820684149664
Generation:  24


100%|██████████| 1000/1000 [00:00<00:00, 1347.88it/s]


Error:  8.865994025694245e-05 COD:  99.6312733175509
Generation:  25


100%|██████████| 1000/1000 [00:00<00:00, 1388.30it/s]


Error:  8.865994025694245e-05 COD:  99.6312733175509
Generation:  26


100%|██████████| 1000/1000 [00:00<00:00, 1427.24it/s]


Error:  8.865994025694245e-05 COD:  99.6312733175509
Generation:  27


100%|██████████| 1000/1000 [00:00<00:00, 1477.90it/s]


Error:  8.865994025694245e-05 COD:  99.6312733175509
Generation:  28


100%|██████████| 1000/1000 [00:00<00:00, 1357.45it/s]


Error:  8.865994025694245e-05 COD:  99.6312733175509
Generation:  29


100%|██████████| 1000/1000 [00:00<00:00, 1445.37it/s]


Error:  8.865994025694245e-05 COD:  99.6312733175509
Generation:  30


100%|██████████| 1000/1000 [00:00<00:00, 1468.38it/s]


Error:  8.865994025694245e-05 COD:  99.6312733175509
Generation:  31


100%|██████████| 1000/1000 [00:00<00:00, 1452.96it/s]


Error:  8.865994025694245e-05 COD:  99.6312733175509
Generation:  32


100%|██████████| 1000/1000 [00:00<00:00, 1500.08it/s]


Error:  8.742149585963993e-05 COD:  99.63642386798767
Generation:  33


100%|██████████| 1000/1000 [00:00<00:00, 1482.51it/s]


Error:  8.468039157750064e-05 COD:  99.64782381124583
Generation:  34


100%|██████████| 1000/1000 [00:00<00:00, 1454.63it/s]


Error:  8.468039157750064e-05 COD:  99.64782381124583
Generation:  35


100%|██████████| 1000/1000 [00:00<00:00, 1490.69it/s]


Error:  8.468039157750064e-05 COD:  99.64782381124583
Generation:  36


100%|██████████| 1000/1000 [00:00<00:00, 1440.06it/s]


Error:  8.468039157750064e-05 COD:  99.64782381124583
Generation:  37


100%|██████████| 1000/1000 [00:00<00:00, 1401.21it/s]


Error:  7.228934141885473e-05 COD:  99.69935678999381
Generation:  38


100%|██████████| 1000/1000 [00:00<00:00, 1406.43it/s]


Error:  7.228934141885473e-05 COD:  99.69935678999381
Generation:  39


100%|██████████| 1000/1000 [00:00<00:00, 1379.74it/s]


Error:  7.228934141885473e-05 COD:  99.69935678999381
Generation:  40


100%|██████████| 1000/1000 [00:00<00:00, 1086.64it/s]


Error:  7.228934141885473e-05 COD:  99.69935678999381
Generation:  41


100%|██████████| 1000/1000 [00:00<00:00, 1356.16it/s]


Error:  7.228934141885473e-05 COD:  99.69935678999381
Generation:  42


100%|██████████| 1000/1000 [00:00<00:00, 1423.13it/s]


Error:  7.228934141885473e-05 COD:  99.69935678999381
Generation:  43


100%|██████████| 1000/1000 [00:00<00:00, 1356.83it/s]


Error:  6.995783854758667e-05 COD:  99.70905324722524
Generation:  44


100%|██████████| 1000/1000 [00:00<00:00, 1431.64it/s]


Error:  6.995783854758667e-05 COD:  99.70905324722524
Generation:  45


100%|██████████| 1000/1000 [00:00<00:00, 1481.99it/s]


Error:  6.995783854758667e-05 COD:  99.70905324722524
Generation:  46


100%|██████████| 1000/1000 [00:00<00:00, 1443.82it/s]


Error:  6.543548431746711e-05 COD:  99.72786120792654
Generation:  47


100%|██████████| 1000/1000 [00:00<00:00, 1376.37it/s]


Error:  5.8994383784649884e-05 COD:  99.75464901788787
Generation:  48


100%|██████████| 1000/1000 [00:00<00:00, 1491.26it/s]


Error:  5.8994383784649884e-05 COD:  99.75464901788787
Generation:  49


100%|██████████| 1000/1000 [00:00<00:00, 1428.04it/s]


Error:  5.8994383784649884e-05 COD:  99.75464901788787
Generation:  50


100%|██████████| 1000/1000 [00:00<00:00, 1417.74it/s]


Error:  5.573673853043372e-05 COD:  99.76819719673507
Generation:  51


100%|██████████| 1000/1000 [00:00<00:00, 1332.23it/s]


Error:  3.080420255444463e-05 COD:  99.87188879915243
Generation:  52


100%|██████████| 1000/1000 [00:00<00:00, 1473.78it/s]


Error:  3.080420255444463e-05 COD:  99.87188879915243
Generation:  53


100%|██████████| 1000/1000 [00:00<00:00, 1220.32it/s]


Error:  3.040109466674547e-05 COD:  99.87356527934935
Generation:  54


100%|██████████| 1000/1000 [00:00<00:00, 1331.79it/s]


Error:  2.5734329282532317e-05 COD:  99.89297382973751
Generation:  55


100%|██████████| 1000/1000 [00:00<00:00, 1418.15it/s]


Error:  2.5734329282532317e-05 COD:  99.89297382973751
Generation:  56


100%|██████████| 1000/1000 [00:00<00:00, 1405.61it/s]


Error:  2.9023275821172424e-05 COD:  99.8792954723821
Generation:  57


100%|██████████| 1000/1000 [00:00<00:00, 1408.46it/s]


Error:  2.2015219260158717e-05 COD:  99.90844118845939
Generation:  58


100%|██████████| 1000/1000 [00:00<00:00, 1344.62it/s]

Error:  2.2015219260158717e-05 COD:  99.90844118845939
{'COD': 99.90844118845939, 'error': 2.2015219260158717e-05, 'coeff': [0.3946665468631425, -0.3002746019939052, 0.20148094206452516, -0.08904702152044475]}



