In [9]:
def genSubstrings(seq,l):
    '''
    returns a list of all substrings of codons in seq of a given length
    :param DnaString:
    :param l: int defining the length of the substrings to find
    :return: list of substrings of length l
    '''

    subStrings = []

    for i in range(3): # iterates through each reading frame
        for j in range(i,len(seq),3): # finds all substrings of codons of length l
            subStrings.append(seq[j:j+l])

    return [x for x in subStrings if len(x) == l]; # removes the substrings that are too short

def dnaToAA(seq):
    '''
    Converts a DNA sequence into the list of amino acids that will make up the protein it encodes for
    :param seq: string representing a DNA sequence
    :return: string representing a peptide
    '''
    codon2aa = {'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
        'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
        'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
        'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
        'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
        'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
        'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
        'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
        'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
        'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
        'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
        'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
        'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
        'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
        'TAC':'Y', 'TAT':'Y', 'TAA':'_', 'TAG':'_',
        'TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W'}

    AminoAcids = ''
    for i in range(0,len(seq),3): # looks at each codon and references the dictionary above to translate them into amino acids
        AminoAcids += codon2aa[seq[i:i+3]]
    return AminoAcids;

def findEncodingSeqs(subStrings, aaString):
    '''
    finds all sequences in subStrings that encode the polypeptide described by aaString
    :param subStrings: list of substrings representing peptides
    :param aaString: string representing a peptide
    :return: list of strings, representing all possible encoding sequences in the input
    '''
    encodingSeqs = []
    for seq in subStrings: # iterates through list of substrings created by genSubStrings
        rev = seq.translate(str.maketrans("ATCG", "TAGC"))[::-1] # calculates the reverse complement of the sequence
        if dnaToAA(seq) == aaString:  # checks if the forward strand matches the peptide
            encodingSeqs.append(seq)
        elif dnaToAA(rev) == aaString: # checks if reverse complement strand matches the peptide
            encodingSeqs.append(seq)

    return encodingSeqs;





def main(fName):
    '''

    :param fName:
    :return:
    '''
    with open(fName) as inFile:
        lines =  inFile.readlines()
        DnaString = lines[0].strip()
        aaString = lines[1].strip()


    subStrings = genSubstrings(DnaString,len(aaString)*3)

    for seq in findEncodingSeqs(subStrings,aaString):
        print(seq)
if __name__ == '__main__':
    main(fName='16input')

AAGGAAGTATTTGAGCCTCATTATTAC
ATAGTAGTGTGGTTCAAATACCTCCTT
AAAGAGGTGTTTGAACCTCATTACTAT
GTAATAGTGCGGTTCAAAAACTTCCTT
AAGGAGGTATTTGAACCCCACTATTAC
ATAGTAGTGAGGTTCGAAGACTTCCTT
GTAGTAATGGGGCTCAAACACCTCTTT
ATAATAATGTGGCTCGAACACTTCTTT
AAAGAAGTTTTCGAACCACATTATTAC
ATAGTAATGAGGTTCGAAAACCTCTTT
GTAGTAGTGCGGCTCAAAAACTTCCTT
ATAGTAATGGGGTTCGAAGACTTCCTT
AAGGAAGTGTTTGAACCTCACTATTAT
ATAATAGTGAGGCTCAAAAACTTCCTT
AAAGAAGTTTTCGAGCCGCACTACTAC
GTAGTAATGGGGTTCGAAGACTTCCTT
GTAGTAATGAGGTTCAAAAACCTCCTT
GTAGTAATGGGGCTCGAAAACCTCCTT
ATAATAATGCGGCTCGAATACTTCCTT
AAGGAAGTATTCGAACCACATTACTAT


In [10]:
main('16input')

AAGGAAGTATTTGAGCCTCATTATTAC
ATAGTAGTGTGGTTCAAATACCTCCTT
AAAGAGGTGTTTGAACCTCATTACTAT
GTAATAGTGCGGTTCAAAAACTTCCTT
AAGGAGGTATTTGAACCCCACTATTAC
ATAGTAGTGAGGTTCGAAGACTTCCTT
GTAGTAATGGGGCTCAAACACCTCTTT
ATAATAATGTGGCTCGAACACTTCTTT
AAAGAAGTTTTCGAACCACATTATTAC
ATAGTAATGAGGTTCGAAAACCTCTTT
GTAGTAGTGCGGCTCAAAAACTTCCTT
ATAGTAATGGGGTTCGAAGACTTCCTT
AAGGAAGTGTTTGAACCTCACTATTAT
ATAATAGTGAGGCTCAAAAACTTCCTT
AAAGAAGTTTTCGAGCCGCACTACTAC
GTAGTAATGGGGTTCGAAGACTTCCTT
GTAGTAATGAGGTTCAAAAACCTCCTT
GTAGTAATGGGGCTCGAAAACCTCCTT
ATAATAATGCGGCTCGAATACTTCCTT
AAGGAAGTATTCGAACCACATTACTAT


These methods take a DNA sequence and a peptide, both as strings, and return all substrings of codons in the
sequence that could encode for the given peptide.