In [1]:
import numpy as np
import networkx as nx
from itertools import product
import random
import matplotlib.pyplot as plt

In [32]:
length = 3
alphabet = ['U', 'C', 'A', 'G']
nodes = [''.join(p) for p in product(alphabet, repeat=length)]

In [33]:
SGC = [
    ["UUU", "UUC"],  # Phenylalanine (Phe)
    ["UUA", "UUG", "CUU", "CUC", "CUA", "CUG"],  # Leucine (Leu)
    ["AUU", "AUC", "AUA"],  # Isoleucine (Ile)
    ["AUG"],  # Methionine (Met) - Start codon
    ["GUU", "GUC", "GUA", "GUG"],  # Valine (Val)
    ["UCU", "UCC", "UCA", "UCG", "AGU", "AGC"],  # Serine (Ser)
    ["CCU", "CCC", "CCA", "CCG"],  # Proline (Pro)
    ["ACU", "ACC", "ACA", "ACG"],  # Threonine (Thr)
    ["GCU", "GCC", "GCA", "GCG"],  # Alanine (Ala)
    ["UAU", "UAC"],  # Tyrosine (Tyr)
    ["UAA", "UAG", "UGA"],  # Stop codons
    ["CAU", "CAC"],  # Histidine (His)
    ["CAA", "CAG"],  # Glutamine (Gln)
    ["AAU", "AAC"],  # Asparagine (Asn)
    ["AAA", "AAG"],  # Lysine (Lys)
    ["GAU", "GAC"],  # Aspartic acid (Asp)
    ["GAA", "GAG"],  # Glutamic acid (Glu)
    ["UGU", "UGC"],  # Cysteine (Cys)
    ["UGG"],  # Tryptophan (Trp)
    ["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"],  # Arginine (Arg)
    ["GGU", "GGC", "GGA", "GGG"],  # Glycine (Gly)
]

In [39]:
swap_index =  {
    0: {#U
        0: 
        #C    #A    #G   
        {1: 0, 2: 1, 3: 2},
        #C
        1:
        #U    #A    #G
        {0: 3, 2: 4, 3: 5},
        #A
        2:
        #U    #C    #G
        {0: 6, 1: 7, 3: 8},
        #G
        3:
        #U    #C    #A
        {0: 9, 1: 10, 2: 11},
        },
    1: {#U
        0: 
        #C    #A    #G   
        {1: 12, 2: 13, 3: 14},
        #C
        1:
        #U    #A    #G
        {0: 15, 2: 16, 3: 17},
        #A
        2:
        #U    #C    #G
        {0: 18, 1: 19, 3: 20},
        #G
        3:
        #U    #C    #A
        {0: 21, 1: 22, 2: 23},
        },
    2: {#U
        0: 
        #C    #A    #G   
        {1: 24, 2: 25, 3: 26},
        #C
        1:
        #U    #A    #G
        {0: 27, 2: 28, 3: 29},
        #A
        2:
        #U    #C    #G
        {0: 30, 1: 31, 3: 32},
        #G
        3:
        #U    #C    #A
        {0: 33, 1: 34, 2: 35},
        },
}

table_ones = np.ones(36)

table_altered = [1, 1, 1, 1, 1, 1,
                 1, 1, 1, 1, 1, 1,
                 1, 1, 1, 1, 1, 1,
                 1, 1, 1, 1, 1, 1,
                 4, 2, 2, 4, 2, 2,
                 2, 2, 4, 2, 2, 4]

In [40]:
def weights(s1, s2, table):
    if sum(a != b for a, b in zip(s1, s2)) == 1:
        if s1[0] != s2[0]:
            # base 1
            letter1 = s1[0]
            letter2 = s2[0]
            base = 0
        elif s1[1] != s2[1]:
            # base 2
            letter1 = s1[1]
            letter2 = s2[1]
            base = 1
        else:
            # base3
            letter1 = s1[2]
            letter2 = s2[2]
            base = 2
    
        l1 = ['U', 'C', 'A', 'G'].index(letter1)
        l2 = ['U', 'C', 'A', 'G'].index(letter2)
        
        return table[swap_index[base][l1][l2]]

In [41]:
def conductance(S, G):
    numerator = 0
    denominator = 0
    for edge, w in nx.get_edge_attributes(G, "weight").items():
        node1, node2 = edge
        if node1 in S and node2 in S:
            denominator += 2 * w
        else:
            numerator += w
            denominator += w

    return numerator / denominator

In [42]:
def mean_conductance(table):
    conductances = []
    for S in SGC:
        G = nx.Graph()
        for i, node1 in enumerate(S):
            for node2 in nodes:
                if node2 == node1:
                    continue
                w = weights(node1, node2, table)
                if w:
                    G.add_edge(node1, node2, weight=w)
                    
        conductances.append(conductance(S, G))

    return np.mean(conductances)

In [43]:
def fitness(table):
    return 1 - mean_conductance(table)

In [45]:
for table in table_ones, table_altered:
    print(fitness(table))

0.1887125220458553
0.3605442176870749


In [46]:
bias_factor = 1
pop_size = 10
probabilities = np.exp(-bias_factor * np.arange(pop_size) / pop_size)
probabilities /= np.sum(probabilities)
print(probabilities)
print(np.random.choice(pop_size, size=2, p=probabilities, replace=False))

[0.15054499 0.13621874 0.12325581 0.11152647 0.10091332 0.09131015
 0.08262084 0.07475843 0.06764422 0.06120702]
[2 5]


In [47]:
def initialize_population(pop_size, num_weights):
    return np.random.rand(pop_size, num_weights)

def crossover(parent1, parent2):
    crossover_point = np.random.randint(1, 36)
    child1 = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
    child2 = np.concatenate((parent2[:crossover_point], parent1[crossover_point:]))
    return child1, child2

def mutate(weights, mutation_rate=0.4):
    index = np.random.randint(36)
    factor = np.random.uniform(1 / (1 + mutation_rate), (1 + mutation_rate))
    weights[index] *= factor
    return weights

def evolutionary_algorithm(fitness, pop_size=150, crossover_rate=0.6, mutation_rate=0.4, delta=1e-6, max_generations=1000, bias_factor=1):
    population = initialize_population(pop_size, 36)
    best_fitness = 0
    generations = 0
    
    probabilities = np.exp(-bias_factor * np.arange(pop_size) / pop_size)
    probabilities /= np.sum(probabilities)
    
    while generations < max_generations:
        fitness_values = np.array([fitness(ind) for ind in population])
        sorted_indices = np.argsort(fitness_values)[::-1]
        population = population[sorted_indices]
        new_population = []
      
        for i in range(int(crossover_rate * pop_size / 2)):
            parent1_index, parent2_index = np.random.choice(pop_size, size=2, p=probabilities, replace=False)
            parent1, parent2 = population[parent1_index], population[parent2_index]
            child1, child2 = crossover(parent1, parent2)
            new_population.extend([child1, child2])
        
        while len(new_population) < pop_size:
            new_population.append(population[np.random.choice(pop_size, p=probabilities)].copy())
        
        new_population = np.array([mutate(ind, mutation_rate) for ind in new_population])
        
        new_best_fitness = max(fitness_values)
        if new_best_fitness > best_fitness:
            best_fitness = new_best_fitness
        print(f"{best_fitness:.5f}, {new_best_fitness:.5f}", generations)
        
        population = new_population
        generations += 1
    
    fitness_values = np.array([fitness(ind) for ind in population])
    sorted_indices = np.argsort(fitness_values)[::-1]
    population = population[sorted_indices]
    best_individual = population[0]
    
    return best_individual / np.sum(best_individual) * 36  # Normalize weights

In [48]:
best_ind = evolutionary_algorithm(fitness, pop_size=50, max_generations=600)

0.25794, 0.25794 0
0.26022, 0.26022 1
0.27177, 0.27177 2
0.27390, 0.27390 3
0.27390, 0.27057 4
0.27390, 0.27015 5
0.28263, 0.28263 6
0.28367, 0.28367 7
0.28787, 0.28787 8
0.29157, 0.29157 9
0.29159, 0.29159 10
0.29955, 0.29955 11
0.31414, 0.31414 12
0.32095, 0.32095 13
0.34273, 0.34273 14
0.34273, 0.34241 15
0.34273, 0.34198 16
0.34273, 0.34194 17
0.34497, 0.34497 18
0.35059, 0.35059 19
0.37242, 0.37242 20
0.37242, 0.36994 21
0.37561, 0.37561 22
0.37561, 0.37556 23
0.38555, 0.38555 24
0.40392, 0.40392 25
0.42580, 0.42580 26
0.42580, 0.42580 27
0.42815, 0.42815 28
0.42821, 0.42821 29
0.43111, 0.43111 30
0.43111, 0.42612 31
0.43415, 0.43415 32
0.43563, 0.43563 33
0.45101, 0.45101 34
0.45218, 0.45218 35
0.45448, 0.45448 36
0.48536, 0.48536 37
0.49507, 0.49507 38
0.49514, 0.49514 39
0.49514, 0.49278 40
0.49514, 0.48649 41
0.50724, 0.50724 42
0.50724, 0.50719 43
0.53240, 0.53240 44
0.53240, 0.53136 45
0.53240, 0.51165 46
0.53290, 0.53290 47
0.54795, 0.54795 48
0.54795, 0.54269 49
0.56796, 0

In [51]:
print(best_ind)

[7.89812001e-09 1.91800795e-07 1.39236933e-07 4.32001563e-07
 8.64907763e-07 3.97315471e-07 4.88896165e-07 9.89534588e-07
 8.80148033e-07 6.96965020e-07 5.02671409e-07 1.23944707e-06
 7.11596893e-07 4.21142088e-07 4.20815338e-07 6.58554814e-07
 5.36822993e-07 1.23045457e-06 1.86345524e-07 1.48176413e-06
 4.49454182e-07 1.45058967e-07 2.57644386e-07 2.19048357e-06
 1.17640501e-07 8.42272775e-07 2.50016851e-06 2.25720612e+01
 1.71545622e-06 3.71140762e-07 6.77613037e-07 1.02525637e-06
 1.26216865e-06 2.43613052e-06 1.11962246e-06 1.34279112e+01]


In [60]:
from tabulate import tabulate

# Column headers
headers = ["", "U", "C", "A", "G"]  # Empty first header for row labels

alphabet = ["U", "C", "A", "G"]

data = []

for l1 in alphabet:
    #from 
    po1 = alphabet.index(l1)
    row = []
    for l2 in alphabet:
        #to
        if l1 == l2:
            row.append(0)
        else:
            po2 = alphabet.index(l2)
            row.append(best_ind[swap_index[2][po1][po2]])
            # print(l1, l2, best_ind[swap_index[2][po1][po2]])
    data.append(row)

# Add row labels to data
data_with_labels = [[label] + list(row) for label, row in zip(alphabet, data)]

# Print table
print(tabulate(data_with_labels, headers=headers, tablefmt="grid"))

+----+--------------+-------------+--------------+-------------+
|    |            U |           C |            A |           G |
| U  |  0           | 1.17641e-07 |  8.42273e-07 | 2.50017e-06 |
+----+--------------+-------------+--------------+-------------+
| C  | 22.5721      | 0           |  1.71546e-06 | 3.71141e-07 |
+----+--------------+-------------+--------------+-------------+
| A  |  6.77613e-07 | 1.02526e-06 |  0           | 1.26217e-06 |
+----+--------------+-------------+--------------+-------------+
| G  |  2.43613e-06 | 1.11962e-06 | 13.4279      | 0           |
+----+--------------+-------------+--------------+-------------+


In [64]:
best_ind_swapped = best_ind.copy()
best_ind_swapped[24], best_ind_swapped[27] = best_ind_swapped[27], best_ind_swapped[24]
best_ind_swapped[32], best_ind_swapped[35] = best_ind_swapped[35], best_ind_swapped[32]
print(best_ind_swapped)
print(fitness(best_ind_swapped))

from tabulate import tabulate

# Column headers
headers = ["", "U", "C", "A", "G"]  # Empty first header for row labels

alphabet = ["U", "C", "A", "G"]

data = []

for l1 in alphabet:
    #from 
    po1 = alphabet.index(l1)
    row = []
    for l2 in alphabet:
        #to
        if l1 == l2:
            row.append(0)
        else:
            po2 = alphabet.index(l2)
            row.append(best_ind_swapped[swap_index[2][po1][po2]])
            # print(l1, l2, best_ind[swap_index[2][po1][po2]])
    data.append(row)

# Add row labels to data
data_with_labels = [[label] + list(row) for label, row in zip(alphabet, data)]

# Print table
print(tabulate(data_with_labels, headers=headers, tablefmt="grid"))

[7.89812001e-09 1.91800795e-07 1.39236933e-07 4.32001563e-07
 8.64907763e-07 3.97315471e-07 4.88896165e-07 9.89534588e-07
 8.80148033e-07 6.96965020e-07 5.02671409e-07 1.23944707e-06
 7.11596893e-07 4.21142088e-07 4.20815338e-07 6.58554814e-07
 5.36822993e-07 1.23045457e-06 1.86345524e-07 1.48176413e-06
 4.49454182e-07 1.45058967e-07 2.57644386e-07 2.19048357e-06
 2.25720612e+01 8.42272775e-07 2.50016851e-06 1.17640501e-07
 1.71545622e-06 3.71140762e-07 6.77613037e-07 1.02525637e-06
 1.34279112e+01 2.43613052e-06 1.11962246e-06 1.26216865e-06]
0.18544858744881176
+----+-------------+--------------+-------------+--------------+
|    |           U |            C |           A |            G |
| U  | 0           | 22.5721      | 8.42273e-07 |  2.50017e-06 |
+----+-------------+--------------+-------------+--------------+
| C  | 1.17641e-07 |  0           | 1.71546e-06 |  3.71141e-07 |
+----+-------------+--------------+-------------+--------------+
| A  | 6.77613e-07 |  1.02526e-06 | 0   

In [69]:
test1 = np.ones(36) * 1e-7
test1[24] = 22.5721
test1[32] = 13.4279
# print(test1)
print(fitness(test1))


# Column headers
headers = ["", "U", "C", "A", "G"]  # Empty first header for row labels

alphabet = ["U", "C", "A", "G"]

data = []

for l1 in alphabet:
    #from 
    po1 = alphabet.index(l1)
    row = []
    for l2 in alphabet:
        #to
        if l1 == l2:
            row.append(0)
        else:
            po2 = alphabet.index(l2)
            row.append(test1[swap_index[2][po1][po2]])
            # print(l1, l2, best_ind[swap_index[2][po1][po2]])
    data.append(row)

# Add row labels to data
data_with_labels = [[label] + list(row) for label, row in zip(alphabet, data)]

# Print table
print(tabulate(data_with_labels, headers=headers, tablefmt="grid"))


test2 = np.ones(36) * 1e-7
test2[27] = 22.5721
test2[35] = 13.4279
# print(test2)
print(fitness(test2))


from tabulate import tabulate

# Column headers
headers = ["", "U", "C", "A", "G"]  # Empty first header for row labels

alphabet = ["U", "C", "A", "G"]

data = []

for l1 in alphabet:
    #from 
    po1 = alphabet.index(l1)
    row = []
    for l2 in alphabet:
        #to
        if l1 == l2:
            row.append(0)
        else:
            po2 = alphabet.index(l2)
            row.append(test2[swap_index[2][po1][po2]])
            # print(l1, l2, best_ind[swap_index[2][po1][po2]])
    data.append(row)

# Add row labels to data
data_with_labels = [[label] + list(row) for label, row in zip(alphabet, data)]

# Print table
print(tabulate(data_with_labels, headers=headers, tablefmt="grid"))

0.17107584128878006
+----+-------+---------+-------+---------+
|    |     U |       C |     A |       G |
| U  | 0     | 22.5721 | 1e-07 |  1e-07  |
+----+-------+---------+-------+---------+
| C  | 1e-07 |  0      | 1e-07 |  1e-07  |
+----+-------+---------+-------+---------+
| A  | 1e-07 |  1e-07  | 0     | 13.4279 |
+----+-------+---------+-------+---------+
| G  | 1e-07 |  1e-07  | 1e-07 |  0      |
+----+-------+---------+-------+---------+
0.904761866800459
+----+---------+-------+---------+-------+
|    |       U |     C |       A |     G |
| U  |  0      | 1e-07 |  1e-07  | 1e-07 |
+----+---------+-------+---------+-------+
| C  | 22.5721 | 0     |  1e-07  | 1e-07 |
+----+---------+-------+---------+-------+
| A  |  1e-07  | 1e-07 |  0      | 1e-07 |
+----+---------+-------+---------+-------+
| G  |  1e-07  | 1e-07 | 13.4279 | 0     |
+----+---------+-------+---------+-------+
