### Problem 16
    Find substrings of a genome encoding a given amino acid sequence.

    Given: A DNA string Text and an amino acid string Peptide.

    Return: All substrings of Text encoding Peptide (if any such substrings exist).

In [93]:
class PeptideEncoding():
    """
    A class to find substrings of a genome encoding a given amino acid sequence.

    ...

    Attributes
    ----------
    strand : str
        dna strand
    peptide : str
        amino acid sequence
    table : dict
        keys are codons and values are amino acid

    Methods
    -------
    reversedStrand(strand):
        return the reverse complement strand
    findSubstrings:
        find the substrings from both strands
    """

    def __init__(self,infile):  
        '''
        contructor: saves attributes 
        
        Parameters
        ----------
            infile : file name
                
        '''
        with open(infile) as file:#read file here
            self.strand=file.readline().rstrip() #the first line is strand
            self.peptide=file.readline().rstrip() #the second line is the amino acid sequence
        self.table={
        'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
        'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
        'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
        'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',                
        'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
        'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
        'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
        'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
        'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
        'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
        'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
        'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
        'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
        'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
        'TAC':'Y', 'TAT':'Y', 'TAA':'_', 'TAG':'_',
        'TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W',
        }

    def reversedStrand(self,strand):
        """
        Generate the reverse complement strand of the given strand
        
        Parameters
        ----------
        string:str
            The DNA strand
        """
        reversedStrand=''
        strandTable={'A':'T','C':'G','G':'C','T':'A'} #store the transformation table in a dict
        for i in strand:
            reversedStrand+=strandTable[i]
        return reversedStrand[::-1] #the reverse complement read from reverse order

    def findSubstrings(self):
        """
        Generate condon lists for the 6 reading frame of a DNA
        """
        dna = self.strand  # the given strand 
        dnaSubString = []
        lenPeptide=len(self.peptide) #calculate the length of amino acid sequence
        lenSubString=3*lenPeptide #calculate the length of the prospective subsequence that encodes the peptides
        allSubStrings=[] #save all the substrings that encodes the given peptide
        
        #--------------------------here we find substrings from the given strand----------------------------#
        for i in range(3):# there are 3 reading frames            
            for j in range(i, len(dna) - lenSubString+1, 3): #generate all posible substrings
                substring=self.strand[j:j+lenSubString]
                subPeptide='' #initialize the subPeptide encoded by the substring
                for p in range(0,lenSubString,3)#codons have length of 3
                    subPeptide+=self.table[substring[p:p+3]] #append codons to the subPeptide          
                if subPeptide==self.peptide: #if the subPeptide equals the given peptide                   
                    allSubStrings.append(substring)
        #--------------------------here we find substrings from the given strand----------------------------#
        
        #-----------------------here we find substrings from the reverse complement--------------------------#
        for i in range(3):# there are 3 reading frames            
            for j in range(i, len(dna) - lenSubString+1, 3):#generate all posible substrings
                substring=self.reversedStrand(self.strand)[j:j+lenSubString]
                subPeptide=''#initialize the subPeptide encoded by the substring
                for p in range(0,lenSubString,3):#codons have length of 3
                    subPeptide+=self.table[substring[p:p+3]] #append codons to the subPeptide 
                if subPeptide==self.peptide: #if the subPeptide equals the given peptide                   
                    allSubStrings.append(self.reversedStrand(substring))  
        #-----------------------here we find substrings from the reverse complement--------------------------#
        return allSubStrings    
    

### Main

In [103]:
def main(infile):
    '''
    Implement the motif search program
    
    Parameters
        ----------
        infile : str 
            the filename of the fasta file

        Returns
        -------
        STDOUT
    '''
    peptideEncoding=PeptideEncoding(infile) #instantiation
    subStrings=peptideEncoding.findSubstrings()
    for i in subStrings:
        print(i)

### Run the program here

In [105]:
if __name__ == "__main__":
    main('rosalind_ba4b.txt')

TGTTTCAGGATGGCCGGGTCGTAT
TGCTTCCGGATGGCGGGTTCGTAC
TGTTTTCGTATGGCAGGATCCTAC
TGTTTCAGAATGGCAGGGTCATAT
TGTTTCCGTATGGCAGGCTCATAT
TGTTTCCGTATGGCAGGGAGCTAT
TGTTTCAGGATGGCGGGGTCGTAC
TGCTTTCGCATGGCCGGTAGCTAT
TGCTTTAGGATGGCAGGTAGTTAT
TGCTTCAGAATGGCAGGGTCGTAC
GTAAGAGCCAGCCATCCGGAAGCA
ATAGGATCCGGCCATCCTAAAACA
GTATGACCCCGCCATCCGGAAACA
GTACGAGCCTGCCATCCGAAAGCA
ATAAGATCCTGCCATCCGGAAACA
ATAGCTACCCGCCATCCGGAAACA
ATAAGAACCCGCCATCCGGAAGCA
ATAAGAGCCAGCCATCCGAAAGCA
GTATGAGCCTGCCATTCGGAAACA
ATAAGACCCAGCCATCCGGAAGCA


### Inspection