# BA4G
Implement LeaderboardCyclopeptideSequencing

In [1]:
with open('integer_mass_table.txt', 'r') as f:
    massTable = {}
    for line in f:
        k, v = line.rstrip().split()
        v = int(v)
        massTable[k] = v

        
def CyclicSpectrum(peptide, massTable):
    prefixMass = [0]
    for i in range(1, len(peptide) + 1):
        prefixMass.append(prefixMass[i-1] + massTable[peptide[i-1]])
    totalMass = prefixMass[-1]
    spectrum = [0]
    for i in range(len(peptide)):
        for j in range(i+1, len(peptide) + 1):
            spectrum.append(prefixMass[j] - prefixMass[i])
            if i > 0 and j < len(peptide):
                spectrum.append(totalMass - (prefixMass[j] - prefixMass[i]))
    spectrum.sort()
    return spectrum


def LinearSpectrum(peptide, massTable):
    prefixMass = [0]
    for i in range(1, len(peptide) + 1):
        prefixMass.append(prefixMass[i-1] + massTable[peptide[i-1]])
    totalMass = prefixMass[-1]
    spectrum = [0]
    for i in range(len(peptide)):
        for j in range(i+1, len(peptide) + 1):
            spectrum.append(prefixMass[j] - prefixMass[i])
    spectrum.sort()
    return spectrum

In [2]:
import copy


def pepScoreCirc(peptide, spectrum, massTable):
    pepSpec = CyclicSpectrum(peptide, massTable)
    score = 0
    tempSpectrum = copy.copy(spectrum)
    for mass in pepSpec:
        if mass in tempSpectrum:
            score += 1
            tempSpectrum.remove(mass)
    return score


def pepScoreLinear(peptide, spectrum, massTable):
    pepSpec = LinearSpectrum(peptide, massTable)
    score = 0
    tempSpectrum = copy.copy(spectrum)
    for mass in pepSpec:
        if mass in tempSpectrum:
            score += 1
            tempSpectrum.remove(mass)
    return score

In [3]:
def trim(peptides, spectrum, N, massTable):
    leaderboard = {}
    for peptide in peptides:
        scoreLinear = pepScoreLinear(peptide, spectrum, massTable)
        leaderboard[peptide] = scoreLinear
    if len(leaderboard.keys()) <= N:
        return peptides
    threshold = sorted(list(leaderboard.values()), reverse=True)[N - 1]
    output = [k for k in leaderboard.keys() if leaderboard[k] >= threshold]
    return output


def LeaderboardCyclopeptideSequencing(spectrum, N, massTable):
    allaa = massTable.keys()
    candidatePep = set(allaa)
    finalPep = ''
    maxScore = 0
    while len(candidatePep):
        newCandidate = set()
        for pep in candidatePep:
            for nxt in allaa:
                newCandidate.add(pep + nxt)
        candidatePep = newCandidate
        tempCandidate = list(candidatePep)
        for pep in tempCandidate:
            pepMass = sum([massTable[k] for k in pep])
            if pepMass == spectrum[-1]:
                scoreCirc = pepScoreCirc(pep, spectrum, massTable)
                if scoreCirc > maxScore:
                    finalPep = pep
                    maxScore = scoreCirc
            elif pepMass > spectrum[-1]:
                candidatePep.remove(pep)
        candidatePep = trim(candidatePep, spectrum, N, massTable)
    return finalPep

In [5]:
with open('rosalind_ba4g.txt', 'r') as f:
    N = int(f.readline().rstrip())
    spectrum = f.readline().rstrip().split()
    spectrum = [int(k) for k in spectrum]
    pep = LeaderboardCyclopeptideSequencing(spectrum, N, massTable)
    print('-'.join([str(massTable[k]) for k in pep]))

113-115-128-113-131-99-186-156-156-147-186-129-128
