# Optimal weight table

In [2]:
import numpy as np
import networkx as nx
from itertools import product
import random
import matplotlib.pyplot as plt

In [271]:
length = 3
alphabet = ['U', 'C', 'A', 'G']
nodes = [''.join(p) for p in product(alphabet, repeat=length)]

In [272]:
SGC = [
    ["UUU", "UUC"],  # Phenylalanine (Phe)
    ["UUA", "UUG", "CUU", "CUC", "CUA", "CUG"],  # Leucine (Leu)
    ["AUU", "AUC", "AUA"],  # Isoleucine (Ile)
    ["AUG"],  # Methionine (Met) - Start codon
    ["GUU", "GUC", "GUA", "GUG"],  # Valine (Val)
    ["UCU", "UCC", "UCA", "UCG", "AGU", "AGC"],  # Serine (Ser)
    ["CCU", "CCC", "CCA", "CCG"],  # Proline (Pro)
    ["ACU", "ACC", "ACA", "ACG"],  # Threonine (Thr)
    ["GCU", "GCC", "GCA", "GCG"],  # Alanine (Ala)
    ["UAU", "UAC"],  # Tyrosine (Tyr)
    ["UAA", "UAG", "UGA"],  # Stop codons
    ["CAU", "CAC"],  # Histidine (His)
    ["CAA", "CAG"],  # Glutamine (Gln)
    ["AAU", "AAC"],  # Asparagine (Asn)
    ["AAA", "AAG"],  # Lysine (Lys)
    ["GAU", "GAC"],  # Aspartic acid (Asp)
    ["GAA", "GAG"],  # Glutamic acid (Glu)
    ["UGU", "UGC"],  # Cysteine (Cys)
    ["UGG"],  # Tryptophan (Trp)
    ["CGU", "CGC", "CGA", "CGG", "AGA", "AGG"],  # Arginine (Arg)
    ["GGU", "GGC", "GGA", "GGG"],  # Glycine (Gly)
]

In [273]:
swap_index =  {
    0: {#U
        0: 
        #C    #A    #G   
        {1: 0, 2: 1, 3: 2},
        #C
        1:
        #A    #G
        {2: 3, 3: 4},
        #A
        2:
        #G
        {3: 5}
        },
    1: {#A
        0: 
        #C    #A    #G   
        {1: 6, 2: 7, 3: 8},
        #C
        1:
        #A    #G
        {2: 9, 3: 10},
        #A
        2:
        #G
        {3: 11}
        },
    2: {#A
        0: 
        #C    #A    #G   
        {1: 12, 2: 13, 3: 14},
        #C
        1:
        #A    #G
        {2: 15, 3: 16},
        #A
        2:
        #G
        {3: 17}
        },
}

table_ones = np.ones(18)

table_altered = [1, 1, 1, 1, 1, 1,
                 1, 1, 1, 1, 1, 1,
                 2, 4, 2, 2, 4, 2]

In [274]:
def weights(s1, s2, table):
    if sum(a != b for a, b in zip(s1, s2)) == 1:
        if s1[0] != s2[0]:
            # base 1
            letter1 = s1[0]
            letter2 = s2[0]
            base = 0
        elif s1[1] != s2[1]:
            # base 2
            letter1 = s1[1]
            letter2 = s2[1]
            base = 1
        else:
            # base3
            letter1 = s1[2]
            letter2 = s2[2]
            base = 2
    
        l1 = ['U', 'C', 'A', 'G'].index(letter1)
        l2 = ['U', 'C', 'A', 'G'].index(letter2)
        if l1 > l2:
            l1, l2 = l2, l1
        return table[swap_index[base][l1][l2]]

In [275]:
def conductance(S, G):
    numerator = 0
    denominator = 0
    for edge, w in nx.get_edge_attributes(G, "weight").items():
        node1, node2 = edge
        if node1 in S and node2 in S:
            denominator += 2 * w
        else:
            numerator += w
            denominator += w

    return numerator / denominator

In [276]:
def mean_conductance(table):
    conductances = []
    for S in SGC:
        G = nx.Graph()
        for i, node1 in enumerate(S):
            for node2 in nodes:
                if node2 == node1:
                    continue
                w = weights(node1, node2, table)
                if w:
                    G.add_edge(node1, node2, weight=w)
                    
        conductances.append(conductance(S, G))

    return np.mean(conductances)

In [277]:
def fitness(table):
    return 1 - mean_conductance(table)

In [279]:
for table in table_ones, table_altered:
    print(fitness(table))

0.1887125220458553
0.28798185941043086


In [283]:
bias_factor = 1
pop_size = 10
probabilities = np.exp(-bias_factor * np.arange(pop_size) / pop_size)
probabilities /= np.sum(probabilities)
print(probabilities)
print(np.random.choice(pop_size, size=2, p=probabilities, replace=False))

[0.150544988  0.1362187383 0.1232558114 0.1115264702 0.1009133233
 0.0913101509 0.0826208412 0.0747584286 0.0676442235 0.0612070246]
[7 6]


In [284]:
def initialize_population(pop_size, num_weights):
    return np.random.rand(pop_size, num_weights)

def crossover(parent1, parent2):
    crossover_point = np.random.randint(1, 18)
    child1 = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
    child2 = np.concatenate((parent2[:crossover_point], parent1[crossover_point:]))
    return child1, child2

def mutate(weights, mutation_rate=0.4):
    index = np.random.randint(18)
    factor = np.random.uniform(1 / (1 + mutation_rate), (1 + mutation_rate))
    weights[index] *= factor
    return weights

def evolutionary_algorithm(fitness, pop_size=150, crossover_rate=0.6, mutation_rate=0.4, delta=1e-6, max_generations=1000, bias_factor=1):
    population = initialize_population(pop_size, 18)
    best_fitness = 0
    generations = 0
    
    probabilities = np.exp(-bias_factor * np.arange(pop_size) / pop_size)
    probabilities /= np.sum(probabilities)
    
    while generations < max_generations:
        fitness_values = np.array([fitness(ind) for ind in population])
        sorted_indices = np.argsort(fitness_values)[::-1]
        population = population[sorted_indices]
        new_population = []
      
        for i in range(int(crossover_rate * pop_size / 2)):
            parent1_index, parent2_index = np.random.choice(pop_size, size=2, p=probabilities, replace=False)
            parent1, parent2 = population[parent1_index], population[parent2_index]
            child1, child2 = crossover(parent1, parent2)
            new_population.extend([child1, child2])
        
        while len(new_population) < pop_size:
            new_population.append(population[np.random.choice(pop_size, p=probabilities)].copy())
        
        new_population = np.array([mutate(ind, mutation_rate) for ind in new_population])
        
        new_best_fitness = max(fitness_values)
        if new_best_fitness > best_fitness:
            best_fitness = new_best_fitness
        print(f"{best_fitness:.5f}, {new_best_fitness:.5f}", generations)
        
        population = new_population
        generations += 1
    
    fitness_values = np.array([fitness(ind) for ind in population])
    sorted_indices = np.argsort(fitness_values)[::-1]
    population = population[sorted_indices]
    best_individual = population[0]
    
    return best_individual / np.sum(best_individual) * 18  # Normalize weights

In [291]:
best_ind = evolutionary_algorithm(fitness, pop_size=40, max_generations=500)

0.27498, 0.27498 0
0.27498, 0.27357 1
0.28808, 0.28808 2
0.28996, 0.28996 3
0.29214, 0.29214 4
0.30677, 0.30677 5
0.30677, 0.29683 6
0.30677, 0.29621 7
0.30677, 0.29917 8
0.31624, 0.31624 9
0.35166, 0.35166 10
0.35166, 0.35076 11
0.35166, 0.33385 12
0.35992, 0.35992 13
0.35992, 0.35634 14
0.36374, 0.36374 15
0.37425, 0.37425 16
0.40034, 0.40034 17
0.41721, 0.41721 18
0.41737, 0.41737 19
0.41737, 0.39900 20
0.41737, 0.39862 21
0.41737, 0.41168 22
0.43720, 0.43720 23
0.43720, 0.43052 24
0.43720, 0.43084 25
0.43720, 0.43270 26
0.44634, 0.44634 27
0.44794, 0.44794 28
0.45182, 0.45182 29
0.45182, 0.45015 30
0.46014, 0.46014 31
0.46528, 0.46528 32
0.51299, 0.51299 33
0.53280, 0.53280 34
0.53280, 0.53259 35
0.53280, 0.52149 36
0.54287, 0.54287 37
0.54296, 0.54296 38
0.54296, 0.54234 39
0.54296, 0.54238 40
0.54296, 0.51732 41
0.54296, 0.52984 42
0.54296, 0.52629 43
0.54296, 0.52811 44
0.54296, 0.51674 45
0.54296, 0.52778 46
0.54296, 0.54292 47
0.57772, 0.57772 48
0.57772, 0.57708 49
0.57919, 0

In [292]:
np.set_printoptions(precision=10, suppress=True)
print(best_ind)

print(mean_conductance(best_ind))
print(fitness(best_ind))

[ 0.0000000056  0.0000000043  0.0000000092  0.0000000023  0.0000000019
  0.0000000131  0.0000000922  0.0000000053  0.0000000258  0.0000000101
  0.0000000618  0.0000000149 17.9971960917  0.0000000015  0.0000000368
  0.000000023   0.0000000205  0.00280358  ]
0.11112043711659729
0.8888795628834028


In [293]:
from tabulate import tabulate

# Column headers
headers = ["", "U", "C", "A", "G"]  # Empty first header for row labels

alphabet = ["U", "C", "A", "G"]

data = []

for l1 in alphabet:
    #from 
    po1 = alphabet.index(l1)
    row = []
    for l2 in alphabet:
        #to
        if l1 == l2:
            row.append(0)
        else:
            po2 = alphabet.index(l2)
            if po1 > po2:
                tmp_po1, tmp_po2 = po2, po1
            else:
                tmp_po1, tmp_po2 = po1, po2
            row.append(best_ind[swap_index[2][tmp_po1][tmp_po2]])
            print(l1, l2, best_ind[swap_index[2][tmp_po1][tmp_po2]])
    data.append(row)

# Add row labels to data
data_with_labels = [[label] + list(row) for label, row in zip(alphabet, data)]

# Print table
print(tabulate(data_with_labels, headers=headers, tablefmt="grid"))

U C 17.997196091683392
U A 1.5400729992300077e-09
U G 3.684341591340294e-08
C U 17.997196091683392
C A 2.2994582963742806e-08
C G 2.052862027002445e-08
A U 1.5400729992300077e-09
A C 2.2994582963742806e-08
A G 0.0028035799661000217
G U 3.684341591340294e-08
G C 2.052862027002445e-08
G A 0.0028035799661000217
+----+--------------+--------------+-------------+-------------+
|    |            U |            C |           A |           G |
| U  |  0           | 17.9972      | 1.54007e-09 | 3.68434e-08 |
+----+--------------+--------------+-------------+-------------+
| C  | 17.9972      |  0           | 2.29946e-08 | 2.05286e-08 |
+----+--------------+--------------+-------------+-------------+
| A  |  1.54007e-09 |  2.29946e-08 | 0           | 0.00280358  |
+----+--------------+--------------+-------------+-------------+
| G  |  3.68434e-08 |  2.05286e-08 | 0.00280358  | 0           |
+----+--------------+--------------+-------------+-------------+


In [None]:
def weight(s1, s2):
    s1 = list(s1)
    s2 = list(s2)
    diff = (np.array(s1) == np.array(s2)).astype(int)
    if sum(diff) == 2:
        if (s1[2] == "U" and s2[2] == "G") or (s1[2] == "G" and s2[2] == "U"):
            return 2
        if (s1[2] == "A" and s2[2] == "C") or (s1[2] == "C" and s2[2] == "A"):
            return 2
        if (s1[2] == "A" and s2[2] == "U") or (s1[2] == "U" and s2[2] == "A"):
            return 2
        if (s1[2] == "C" and s2[2] == "G") or (s1[2] == "G" and s2[2] == "C"):
            return 2
        
        if (s1[2] == "U" and s2[2] == "C") or (s1[2] == "C" and s2[2] == "U"):
            return 4
        if (s1[2] == "A" and s2[2] == "G") or (s1[2] == "G" and s2[2] == "A"):
            return 4
        else:
            return 1
    else:
        return False

In [None]:
def weight_popt(s1, s2):
    s1 = list(s1)
    s2 = list(s2)
    diff = (np.array(s1) == np.array(s2)).astype(int)
    if sum(diff) == 2:
        # position 1
        if (s1[0] == "U" and s2[0] == "G") or (s1[0] == "G" and s2[0] == "U"):
            return 0.003
        if (s1[0] == "A" and s2[0] == "C") or (s1[0] == "C" and s2[0] == "A"):
            return 0.003
        if (s1[0] == "A" and s2[0] == "U") or (s1[0] == "U" and s2[0] == "A"):
            return 0.002
        if (s1[0] == "C" and s2[0] == "G") or (s1[0] == "G" and s2[0] == "C"):
            return 0.003
        if (s1[0] == "U" and s2[0] == "C") or (s1[0] == "C" and s2[0] == "U"):
            return 0.006
        if (s1[0] == "A" and s2[0] == "G") or (s1[0] == "G" and s2[0] == "A"):
            return 0.005
        
        # position 2
        if (s1[1] == "U" and s2[1] == "G") or (s1[1] == "G" and s2[1] == "U"):
            return 0.003
        if (s1[1] == "A" and s2[1] == "C") or (s1[1] == "C" and s2[1] == "A"):
            return 0.003
        if (s1[1] == "A" and s2[1] == "U") or (s1[1] == "U" and s2[1] == "A"):
            return 0.002
        if (s1[1] == "C" and s2[1] == "G") or (s1[1] == "G" and s2[1] == "C"):
            return 0.004
        if (s1[1] == "U" and s2[1] == "C") or (s1[1] == "C" and s2[1] == "U"):
            return 0.002
        if (s1[1] == "A" and s2[1] == "G") or (s1[1] == "G" and s2[1] == "A"):
            return 0.005
        
        if (s1[2] == "U" and s2[2] == "G") or (s1[2] == "G" and s2[2] == "U"):
            return 0.007
        if (s1[2] == "A" and s2[2] == "C") or (s1[2] == "C" and s2[2] == "A"):
            return 0.021
        if (s1[2] == "A" and s2[2] == "U") or (s1[2] == "U" and s2[2] == "A"):
            return 0.018
        if (s1[2] == "C" and s2[2] == "G") or (s1[2] == "G" and s2[2] == "C"):
            return 0.012
        if (s1[2] == "U" and s2[2] == "C") or (s1[2] == "C" and s2[2] == "U"):
            return 15.925
        if (s1[2] == "A" and s2[2] == "G") or (s1[2] == "G" and s2[2] == "A"):
            return 1.977

    else:
        return False