In [16]:
import itertools
import pathlib
from more_itertools import batched

In [17]:
DNA_TO_AA = {
    "TTT": "F",
    "CTT": "L",
    "ATT": "I",
    "GTT": "V",
    "TTC": "F",
    "CTC": "L",
    "ATC": "I",
    "GTC": "V",
    "TTA": "L",
    "CTA": "L",
    "ATA": "I",
    "GTA": "V",
    "TTG": "L",
    "CTG": "L",
    "ATG": "M",
    "GTG": "V",
    "TCT": "S",
    "CCT": "P",
    "ACT": "T",
    "GCT": "A",
    "TCC": "S",
    "CCC": "P",
    "ACC": "T",
    "GCC": "A",
    "TCA": "S",
    "CCA": "P",
    "ACA": "T",
    "GCA": "A",
    "TCG": "S",
    "CCG": "P",
    "ACG": "T",
    "GCG": "A",
    "TAT": "Y",
    "CAT": "H",
    "AAT": "N",
    "GAT": "D",
    "TAC": "Y",
    "CAC": "H",
    "AAC": "N",
    "GAC": "D",
    "TAA": "0",
    "CAA": "Q",
    "AAA": "K",
    "GAA": "E",
    "TAG": "0",
    "CAG": "Q",
    "AAG": "K",
    "GAG": "E",
    "TGT": "C",
    "CGT": "R",
    "AGT": "S",
    "GGT": "G",
    "TGC": "C",
    "CGC": "R",
    "AGC": "S",
    "GGC": "G",
    "TGA": "0",
    "CGA": "R",
    "AGA": "R",
    "GGA": "G",
    "TGG": "W",
    "CGG": "R",
    "AGG": "R",
    "GGG": "G",
}


def solve(rna):
    dna = "".join(c if c != "U" else "T" for c in rna)
    aa = "".join(DNA_TO_AA["".join(codon)] for codon in batched(dna, 3) if len(codon) == 3)
    return "".join(itertools.takewhile(lambda c: c != "0", aa))


assert solve("AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA") == "MAMAPRTEINSTRING"

In [18]:
rna = pathlib.Path("data/rosalind_prot.txt").read_text()
solve(rna)

'MAVDYGQAKPRTNGPRLMTDPPDPGESCVSVFRMGNWRRALLTPSKNLDSDFDFAESRNPYLAIRDYHGHADQAMPARRALANTDRYMIVEQILPFLPFIHPSLKHLFTRSLFKTQVLLNKVPRVNGGSVHSVKGETAVLKLRKDLTCDTVRQRSDRLSNGDKQLYIVAVLNCIGHNSSVSYLQTPGFNMCGRGRQSLTNELLAFNSIGERFPDPSYVEFLRCQPLKVVNWSHGLRWHSRRQLSAEGWAFISSFCGAVQHTLSWSSWTAEPCGQARGRPHVRFFSTIATSCRRNDINAPTFKLGVLATASCSEALSGRSRANGLISGLLGEPEVRTTFGQHRQYIAKTLQVWPCQHPQFTSILSSLLTVCLAGYGGRVPRGTTHGRPWDGKTCGRIARLSYQVWHSKHLLIISKGGTHRIVCVGVKKRPCEQLSFSNLIGRAVKPSEANRQGRTTFTRTTGLRKGKDPRNSLAHRSIVARAKPLMDLPKSDLVCEGAQPPKWPNAVELRKFLDDNAGSYDPVADGGREDLFETAPLSANHVVPGYYPVLQLAIYVAHFLCDFVKSGSSSAATSVQATRMTLPTCSLFERNYDDVDLYKEETHLRRVPGNRKLLYVDIKDIFPRVPVANPDRAFHCACGLLERFDPIRVTSYQFVIGEGRPLTEAQLWHHQARFVAFRQGDHMMKNEGLLSVTKHRLGIVNCTKSPCLTRVKWGVHTCIVICEVPSWGSLFLTANQCRDQYYWCSRTTAGAAVTARIANVIITECLIMAGASEESLNVQRHLVGTSLTLRELTAIFISTGTASEQSGVRQLGASSTSPLSFSQKTSNIYCRDSGLRRAPLRAETTADCINDPGEPSPSSAICSSLGLYSAPLSHNVPNMAAIMYSSQYWLRGNSSSASNLVVSQPAIAHVGSNGPALAGFSLQHHTPKATRPIEHRVCDASRSFRSTKSEPCSWVTVEGHDGGGTLLLTDDWGFLASDDGFRKFGFIGCTEEGRHNVVSN