In [20]:
import numpy as np
import networkx as nx
from itertools import product
import random
import matplotlib.pyplot as plt

In [21]:
partition_table5a = [
    ["UUU", "UUC", "UUA"],
    ["UUG", "CUG", "AUG"],
    ["CUU", "CUC", "CUA"],
    ["AUU", "AUC", "AUA"],
    ["GUU", "GUC", "GUA"],
    ["GUG", "GCG", "GAG", "GGG"],
    ["UCU", "UCC", "UCA"],
    ["UCG", "CCG", "ACG"],
    ["CCU", "CCC", "CCA"],
    ["ACU", "ACC", "ACA"],
    ["GCU", "GCC", "GCA"],
    ["UAU", "UAC", "UAA"],
    ["UAG", "CAG", "AAG"],
    ["CAU", "CAC", "CAA"],
    ["AAU", "AAC", "AAA"],
    ["GAU", "GAC", "GAA"],
    ["UGU", "UGC", "UGA"],
    ["UGG", "CGG", "AGG"],
    ["CGU", "CGC", "CGA"],
    ["AGU", "AGC", "AGA"],
    ["GGU", "GGC", "GGA"],
]

In [22]:
print(len(set([item for sublist in partition_table5a for item in sublist])))

64


In [23]:
length = 3
alphabet = ['A', 'C', 'G', 'U']
nodes = [''.join(p) for p in product(alphabet, repeat=length)]

In [24]:
swap_index =  {
    0: {#U
        0: 
        #C    #A    #G   
        {1: 0, 2: 1, 3: 2},
        #C
        1:
        #A    #G
        {2: 3, 3: 4},
        #A
        2:
        #G
        {3: 5}
        },
    1: {#U
        0: 
        #C    #A    #G   
        {1: 6, 2: 7, 3: 8},
        #C
        1:
        #A    #G
        {2: 9, 3: 10},
        #A
        2:
        #G
        {3: 11}
        },
    2: {#U
        0: 
        #C    #A    #G   
        {1: 12, 2: 13, 3: 14},
        #C
        1:
        #A    #G
        {2: 15, 3: 16},
        #A
        2:
        #G
        {3: 17}
        },
}

table_ones = np.ones(18)

table_altered = [1, 1, 1, 1, 1, 1,
                 1, 1, 1, 1, 1, 1,
                 4, 2, 2, 2, 2, 4]

In [25]:
def weights(s1, s2, table):
    if sum(a != b for a, b in zip(s1, s2)) == 1:
        if s1[0] != s2[0]:
            # base 1
            letter1 = s1[0]
            letter2 = s2[0]
            base = 0
        elif s1[1] != s2[1]:
            # base 2
            letter1 = s1[1]
            letter2 = s2[1]
            base = 1
        else:
            # base3
            letter1 = s1[2]
            letter2 = s2[2]
            base = 2
    
        l1 = ['U', 'C', 'A', 'G'].index(letter1)
        l2 = ['U', 'C', 'A', 'G'].index(letter2)
        if l1 > l2:
            l1, l2 = l2, l1
        return table[swap_index[base][l1][l2]]

In [26]:
def conductance(S, G):
    numerator = 0
    denominator = 0
    for edge, w in nx.get_edge_attributes(G, "weight").items():
        node1, node2 = edge
        if node1 in S and node2 in S:
            denominator += 2 * w
        else:
            numerator += w
            denominator += w

    return numerator / denominator

In [27]:
def mean_conductance(table, code):
    conductances = []
    for S in code:
        G = nx.Graph()
        for i, node1 in enumerate(S):
            for node2 in nodes:
                if node2 == node1:
                    continue
                w = weights(node1, node2, table)
                if w:
                    G.add_edge(node1, node2, weight=w)
                    
        conductances.append(conductance(S, G))
    return np.mean(conductances)

In [28]:
def fitness(table, code):
    return 1 - mean_conductance(table, code)

In [29]:
print(mean_conductance(table_ones, partition_table5a))
print(fitness(table_ones, partition_table5a))

0.7724867724867727
0.22751322751322733


In [30]:
def random_partition():
    partition = []
    for i in range(21):
        partition.append(i)
    for i in range(64 - 21):
        partition.append(random.randint(0, 20))
    random.shuffle(partition)
    return partition

def partition_to_codons(partition):
    partition_codons = [[] for _ in range(21)]
    for i, group in enumerate(partition):
        partition_codons[group].append(nodes[i])
    return partition_codons

In [31]:
partition = partition_to_codons(random_partition())
print(mean_conductance(table_ones, partition))

0.9762912572436381


In [32]:
def initialize_population(pop_size=100):
    partitions = [random_partition() for _ in range(pop_size)]
    return partitions

In [78]:
def crossover(parent1, parent2):
    child = parent1.copy()
    for i in range(64):
        if np.random.rand() > 0.5:
            child[i] = parent2[i]
        if len(set(child)) != 21:
            child[i] = parent1[i]
    return child

p1 = random_partition()
p2 = random_partition()

c = crossover(p1, p2)

print(c)
print(partition_to_codons(c))

[16, 16, 17, 19, 19, 20, 8, 10, 20, 13, 14, 3, 14, 5, 3, 4, 8, 10, 10, 13, 11, 17, 7, 1, 3, 20, 14, 2, 7, 20, 4, 18, 18, 5, 13, 12, 8, 15, 11, 0, 10, 8, 11, 11, 19, 17, 0, 16, 5, 2, 17, 0, 18, 11, 19, 8, 3, 9, 3, 13, 11, 19, 2, 6]
[['GCU', 'GUG', 'UAU'], ['CCU'], ['CGU', 'UAC', 'UUG'], ['AGU', 'AUG', 'CGA', 'UGA', 'UGG'], ['AUU', 'CUG'], ['AUC', 'GAC', 'UAA'], ['UUU'], ['CCG', 'CUA'], ['ACG', 'CAA', 'GCA', 'GGC', 'UCU'], ['UGC'], ['ACU', 'CAC', 'CAG', 'GGA'], ['CCA', 'GCG', 'GGG', 'GGU', 'UCC', 'UUA'], ['GAU'], ['AGC', 'CAU', 'GAG', 'UGU'], ['AGG', 'AUA', 'CGG'], ['GCC'], ['AAA', 'AAC', 'GUU'], ['AAG', 'CCC', 'GUC', 'UAG'], ['CUU', 'GAA', 'UCA'], ['AAU', 'ACA', 'GUA', 'UCG', 'UUC'], ['ACC', 'AGA', 'CGC', 'CUC']]


In [90]:
def mutate(parent, mutation_rate=0.4):
    child = parent.copy()
    if np.random.rand() < mutation_rate:
        while True:
            i, j = np.random.choice(64, 2, replace=False)
            child[i] = parent[j]
            if len(set(child)) == 21:
                break
            else:
                child[i] = parent[i]

    if np.random.rand() < mutation_rate:
        while True:
            i, j = np.random.choice(64, 2, replace=False)
            child[i] = parent[j]
            if len(set(child)) == 21:
                break
            else:
                child[i] = parent[i]
    return child

In [91]:
def evolutionary_algorithm(fitness, pop_size=150, crossover_rate=0.6, mutation_rate=0.4, max_generations=1000, bias_factor=1, table=table_ones, start=None):
    
    if start is not None:
        population = [start for _ in range(pop_size)]
    else:
        population = initialize_population(pop_size)
    
    best_fitness = 0
    generations = 0
    
    probabilities = np.exp(-bias_factor * np.arange(pop_size) / pop_size)
    probabilities /= np.sum(probabilities)
    
    while generations < max_generations:
        fitness_values = np.array([fitness(table, partition_to_codons(ind)) for ind in population])
        sorted_indices = np.argsort(fitness_values)[::-1]
        sorted_population = []
        for i in sorted_indices:
            sorted_population.append(population[i])
        population = sorted_population
        new_population = []
      
        for i in range(int(crossover_rate * pop_size)):
            parent1_index, parent2_index = np.random.choice(pop_size, size=2, p=probabilities, replace=False)
            parent1, parent2 = population[parent1_index], population[parent2_index]
            child = crossover(parent1, parent2)
            new_population.append(child)
        
        while len(new_population) < pop_size:
            new_population.append(mutate(population[np.random.choice(pop_size, p=probabilities)].copy()))
        
        new_population = np.array(new_population)
        
        new_best_fitness = max(fitness_values)
        if new_best_fitness > best_fitness:
            best_fitness = new_best_fitness
        print(f"{best_fitness:.5f}, {new_best_fitness:.5f}", generations, f"cond: {(1 - best_fitness):.5f}")
        
        population = new_population
        generations += 1
    
    fitness_values = np.array([fitness(table, partition_to_codons(ind)) for ind in population])
    sorted_indices = np.argsort(fitness_values)[::-1]
    population = population[sorted_indices]
    best_individual = population[0]
    
    return best_individual

In [86]:
best_inds = []
fitnesses = []
best_ind = random_partition()

In [95]:
for _ in range(50):
    best_ind = evolutionary_algorithm(fitness, pop_size=1000,
        max_generations=20, bias_factor=2.0, table=table_altered,
        mutation_rate=0.7, crossover_rate=0.6,
        start=best_ind)
    best_inds.append(best_ind)
    fitnesses.append(fitness(table_altered, partition_to_codons(best_ind)))

0.41950, 0.41950 0 cond: 0.58050
0.41950, 0.41950 1 cond: 0.58050
0.41950, 0.41950 2 cond: 0.58050
0.41950, 0.41950 3 cond: 0.58050
0.41950, 0.41950 4 cond: 0.58050
0.41950, 0.41950 5 cond: 0.58050
0.41950, 0.41950 6 cond: 0.58050
0.41950, 0.41950 7 cond: 0.58050
0.41950, 0.41950 8 cond: 0.58050
0.41950, 0.41950 9 cond: 0.58050
0.41950, 0.41950 10 cond: 0.58050
0.41950, 0.41950 11 cond: 0.58050
0.42222, 0.42222 12 cond: 0.57778
0.42222, 0.42222 13 cond: 0.57778
0.42222, 0.42222 14 cond: 0.57778
0.42222, 0.42222 15 cond: 0.57778
0.42222, 0.42222 16 cond: 0.57778
0.42222, 0.42222 17 cond: 0.57778
0.42222, 0.42222 18 cond: 0.57778
0.42222, 0.42222 19 cond: 0.57778
0.43537, 0.43537 0 cond: 0.56463
0.43537, 0.43537 1 cond: 0.56463
0.43537, 0.43537 2 cond: 0.56463
0.43537, 0.43537 3 cond: 0.56463
0.43537, 0.43537 4 cond: 0.56463
0.43537, 0.43537 5 cond: 0.56463
0.43537, 0.43537 6 cond: 0.56463
0.43537, 0.43537 7 cond: 0.56463
0.43537, 0.43537 8 cond: 0.56463
0.43537, 0.43537 9 cond: 0.56463


KeyboardInterrupt: 

In [100]:
print(best_ind, fitness(table_altered, partition_to_codons(best_ind)))

[10 10 10 10 12 12 12 12 20 20 20 20  7  6  7  6  1 13  1 13  3  4  3  4
 11 11 11 11  0  0  0  0  8  8  8  8  5  5  5  5  9  9  9  9 15 19 15 19
 18 16 18 16 14 14 14 14 17 17 17 17  2  2  2  2] 0.435374149659864


In [None]:
print(partition_to_codons(best_ind))

[['CUA', 'CUC', 'CUG', 'CUU'], ['UAA', 'UAC', 'UAG', 'UAU'], ['CCA', 'CCC', 'CCG', 'CCU'], ['CAA', 'CAC', 'CAG', 'CAU'], ['AUA', 'AUU'], ['CGA', 'CGU'], ['AUC', 'AUG', 'UGA', 'UGU'], ['UGC', 'UGG'], ['GUC', 'GUG'], ['AGA', 'AGC', 'AGG', 'AGU'], ['UUA', 'UUC', 'UUG', 'UUU'], ['GCA', 'GCU'], ['GAA', 'GAC', 'GAG', 'GAU'], ['ACA', 'ACC', 'ACG', 'ACU'], ['CGC', 'CGG'], ['GGA', 'GGC', 'GGG', 'GGU'], ['GCC', 'GCG'], ['AAA', 'AAU'], ['AAC', 'AAG'], ['UCA', 'UCC', 'UCG', 'UCU'], ['GUA', 'GUU']]


In [56]:
partition_table5b = [
    ["UUU", "UUC"],
    ["UUA", "UUG"],
    ["CUU", "CUC", "CUA", "CUG"],
    ["AUU", "AUC", "AUA", "AUG"],
    ["GUU", "GUC", "GUA", "GUG"],
    ["UCU", "UCC", "UCA", "UCG"],
    ["CCU", "CCC", "CCA", "CCG"],
    ["ACU", "ACC", "ACA", "ACG"],
    ["GCU", "GCC", "GCA", "GCG"],
    ["UAU", "UAC"],
    ["UAA", "UAG"],
    ["CAU", "CAC"],
    ["CAA", "CAG"],
    ["AAU", "AAC", "AAA", "AAG"],
    ["GAU", "GAC"],
    ["GAA", "GAG"],
    ["UGU", "UGC", "UGA", "UGG"],
    ["CGU", "CGC", "CGA", "CGG"],
    ["AGU", "AGC", "AGA", "AGG"],
    ["GGU", "GGC", "GGA", "GGG"],
]

In [57]:
print(len(set([item for sublist in partition_table5b for item in sublist])))

64


In [58]:
mean_conductance(table_altered, partition_table5b)

0.5428571428571429

In [59]:
fitness(table_altered, partition_table5b)

0.4571428571428571