In [51]:
class FindPeptide:
    '''
    This class holds functions that implement the branch and bound algorithm to find
    every possible peptide that could form a given spectrum generated by a mass spectrometer

    Spectra are defined as lists of ints where each int is a mass in the spectra
    Peptides are defined as lists of ints where each int is the integer mass representation of an amino acid
    '''

    def __init__(self, masses):
        self.spectrum = masses

    def genLinearSpectrum(self, peptide):
        '''
        Generates the liner spectrum of a peptide, very similar to genCycloSpectrum except this function assumes the
        peptide is linear
        :param peptide: list of ints representing a peptide
        :return: List of ints representing the theoretical spectrum of the peptide
        '''
        peptide = [x for x in peptide if x > 0] # removes any 0's in the peptide, they're left over from the intial branch
        spectrum = [0, sum(peptide)] # adds the empty peptide and the mass of the peptide
        for i in range(len(peptide)): # adds the mass of every substring in the peptide
            for j in range(i+1, len(peptide)):
                spectrum.append(sum(peptide[i:j]))

        spectrum.sort()
        return spectrum;
    def genCycloSpectrum(self, peptide):
        '''
        Generates the theoretical spectrum of masses of a given cyclic peptide
        :param peptide: list of ints representing a peptide
        :return: List of ints representing the theoretical spectrum of the peptide
        '''

        peptide = [x for x in peptide if x > 0]  # removes any 0's in the peptide, they're left over from the intial branch
        if len(peptide) == 1:
            return peptide

        cyclicString = peptide+peptide # handles the cyclic nature of the peptide by appending it to itself

        spectrum = [0, sum(peptide)]
        for i in range(len(peptide)): # sums over every unique substring in cyclicString
            for j in range(1,len(peptide)):
                spectrum.append(sum(cyclicString[i:i+j]))

        spectrum.sort()

        return spectrum;

    def branch(self, candidates):
        '''
        Implements the branch step of the branch and bound method.
        This method creates new candidates peptides by appending every possible amino acid to the already
        existing candidate peptides.

        :param candidates:
        :return:
        '''
        expandedCanidates = []
        aaMasses = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186]
        for peptide in candidates:
            for aa in aaMasses: # adds a differnt amino acid to each peptide in candidates, to cover all possibilities
                expandedCanidates.append(peptide+[aa])

        return expandedCanidates;

    def branchAndBound(self):
        '''
        Implements the branch and bound algorithm to find a theoretical peptide that is consistent with the given spectrum.

        BranchAndBound:

        Start with a list of candidate peptides with just [0], the empty peptide
        Candidates will hold all candidate peptides that could match our spectrum

        while candidates is not empty:
            branch(candidates), The purpose of this step is to add all possible peptides of length k+1 to the list
                                where k is the length of the peptides in candidates
            bound(candiates), This step will remove any peptides in candidates that are not consistent with our spectrum

            if there is a peptide that that has a mass equal to our original peptide and it generates an identical spectrum
            to our given spectrum:
                add it to list of peptides that mach

        finally we will have a list of all possible peptides

        :return: All possible origin peptides as a 2d list of ints
        '''
        finalPeptides = [] # will hold the solutions
        candidates = [[0]]

        while candidates: # main loop of the algorithm

            candidates = self.branch(candidates) # branch step

            for peptide in candidates[:]: # iterates through candidates, checking for inconsistent peptides and matches
                cycloSpectrum = self.genCycloSpectrum(peptide)
                linearSpectrum = self.genLinearSpectrum(peptide)

                if [x for x in linearSpectrum if x not in self.spectrum]: # checks to see if there are any masses in the peptides spectrum that are not in the given spectrum
                    candidates.remove(peptide)

                elif [x for x in linearSpectrum if linearSpectrum.count(x) > self.spectrum.count(x)]: # checks if the multiplicity of masses are consistent with given spectrum
                    candidates.remove(peptide)

                elif cycloSpectrum == self.spectrum and sum(peptide) == max(self.spectrum): # checks for a match
                    finalPeptides.append(peptide)
                    candidates.remove(peptide)

        return finalPeptides;


def main(fName):
    '''
    Handles input and output for branchAndBound
    Creates a FindPeptide object using the given spectra and runs branchAndBound
    Also organizes the output from branchAndBound into the desired syntax
    :param fName:
    :return:
    '''
    with open(fName) as inFile:
        lines = inFile.readlines()
        masses = list(map(int,lines[0].strip().split()))

    peptide = FindPeptide(masses)
    finalPeps = peptide.branchAndBound()

    for pep in finalPeps:
        toPrint = ''
        for mass in pep[1:]:
            toPrint += str(mass)+'-'
        print(toPrint.rstrip('-'))

if __name__ == '__main__':
    main(fName='')


FileNotFoundError: [Errno 2] No such file or directory: ''

In [None]:
main('18input')



Implements the branch and bound algorithm to find a theoretical peptide that is consistent with the given spectrum.
        Spectra are defined as lists of ints where each int is a mass in the spectra
        Peptides are defined as lists of ints where each int is the integer mass representation of an amino acid

        BranchAndBound:

        Start with a list of candidate peptides with just [0], the empty peptide
        Candidates will hold all candidate peptides that could match our spectrum

        while candidates is not empty:
            branch(candidates), The purpose of this step is to add all possible peptides of length k+1 to the list
                                where k is the length of the peptides in candidates
            bound(candiates), This step will remove any peptides in candidates that are not consistent with our spectrum

            if there is a peptide that that has a mass equal to our original peptide and it generates an identical spectrum
            to our given spectrum:
                add it to list of peptides that mach

        finally we will have a list of all possible peptides