In [7]:
from Bio import SeqIO
from collections import Counter
from collections import defaultdict
import os

dataset_path = "/Users/udz/Desktop/codons_analysis"
file_name = "cereus.fna"
file_path = os.path.join(dataset_path, file_name)

In [8]:
def generate_codons(deep = 2):
    bases = ['A', 'T', 'G', 'C']
    codons_to_skip = ['TAA', 'TAG', 'TGA']
    if deep == 0:
        return bases
    
    res = []
    for i in range(0, len(bases)):
        current_base = bases[i]
        for suffix in generate_codons(deep - 1):
            codon = current_base + suffix
            if codon in codons_to_skip:
                next

            res.append(codon)
    return res

def generate_table():
    table = defaultdict(lambda: defaultdict(int))
    codons = generate_codons()

    for codon1 in codons:
        for codon2 in codons:
            table[codon1][codon2] = 0
    return table

In [9]:
def codons_counts(seq):
    all_codons = [str(seq[i:i + 3]) for i in range(0, len(seq), 3)]
    return Counter(all_codons)

def codons_coorurrencies(seq):
    res = defaultdict(lambda: defaultdict(int))

    for i in range(0, len(seq), 3):
        prev_codon = seq[i - 3: i] if i > 0 else ''
        next_codon = seq[i + 3: i + 6] if i + 4 < len(seq) else ''
        current_codon = seq[i:i + 3]
        
        if prev_codon != '':
            res[prev_codon][current_codon] += 0.5
            res[current_codon][prev_codon] += 0.5
        
        if next_codon != '':
            res[next_codon][current_codon] += 0.5
            res[current_codon][next_codon] += 0.5
    return res

def count_cooxisting(counts):
    res = defaultdict(lambda: defaultdict(int))
    
    for codon1 in counts:
        for codon2 in counts:
            res[codon1][codon2] = 1

    return res

def codons_percents(counts):
    all_count = sum([counts[i] for i in counts])
    res = defaultdict(lambda: defaultdict(int))
    for codon_1 in counts:
        for codon_2 in counts:
            res[codon_1][codon_2] = (counts[codon_1] * counts[codon_2]) / (all_count ** 2)
    return res

def add_table(base_table, table_to_add):
    res = defaultdict(lambda: defaultdict(int))

    for codon1 in base_table:
        for codon2 in base_table[codon1]:
            to_add = base_table[codon1][codon2]
            if (codon1 in table_to_add) and (codon2 in table_to_add[codon1]):
                to_add += table_to_add[codon1][codon2]
            res[codon1][codon2] += to_add
    return res

def sum_tables(default_table, table1, table2):
    res_table = add_table(default_table, table1)
    return add_table(res_table, table2)

def divide_each_value_in_table(table, divider):
    for i in table:
        for j in table[i]:
            table[i][j] /= divider
    return table

In [10]:
seq = ''
default_table = generate_table()
seqs_count = 0
cooexisting_table = {}

for seq_record in SeqIO.parse(file_path, "fasta"):
    seq = seq_record.seq
    seqs_count += 1
    counts = codons_counts(seq)
        
    coocurrencies = codons_coorurrencies(seq)
    coexistings = count_cooxisting(counts)

    cooexisting_table = sum_tables(default_table, cooexisting_table, coexistings)

In [11]:
import csv

def write_in_file(file_path, res_table):
    with open(file_path, 'w', newline='') as csvfile:
        fieldnames = ['codon'] + list(res_table.keys())
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='|')

        writer.writeheader()
        for codon in res_table:
            row = res_table[codon]
            for num in res_table[codon]:
                row[num] = str(row[num]).replace('.', ',')
            row['codon'] = codon
            writer.writerow(row)

In [12]:
write_in_file(os.path.join(dataset_path, file_name + '.coexisting' + '.csv'), cooexisting_table)