# BA2C Find a Profile-most Probable k-mer in a String

In [22]:
import numpy as np
from collections import defaultdict

def readProfile(fd, k):
    profile = np.zeros([4, k])
    for i in range(4):
        profile[i, :] = list(map(float, fd.readline().rstrip().split()))
    return(profile)

def BA2C(filename):
    with open(filename, 'r') as f:
        seq = f.readline().rstrip()
        k = int(f.readline().rstrip())
        profile = readProfile(f, k)
        kmerProb = defaultdict(int)
        base2number = {'A' : 0, 'C' : 1, 'G' : 2, 'T' : 3}
        for i in range(len(seq) - k + 1):
            prob = np.prod([profile[base2number[x], y] for x,y in zip(seq[i : i + k], range(k))])
            if prob > kmerProb[seq[i : i + k]]:
                kmerProb[seq[i : i + k]] = prob
        maxProb = max(kmerProb.values())
        maxKmer = [k for k,v in kmerProb.items() if v == maxProb]
        return maxKmer

## Test

In [23]:
BA2C('BA2C-test.txt')

['CCGAG']

In [25]:
BA2C('BA2C-test2.txt')

['TGTCGC', 'TGACGC']

## Quiz

In [26]:
BA2C('rosalind_ba2c.txt')

['GGTCGA']