In [1]:
import re
import random
#dictionary to convert nucleotides to Amino acids
Codons = { "GCU": "A", "GCC": "A", "GCA": "A", "GCG": "A",
"UUU": "F", "UUC": "F",
"UUA": "L", "UUG": "L", "CUU": "L", "CUC":"L", "CUA": "L", "CUG": "L",
"AUU": "I", "AUC": "I", "AUA": "I",
"AUG": "M",
"GUU": "V", "GUC": "V", "GUA": "V", "GUG": "V",
"UCU":"S", "UCC": "S", "UCA": "S", "UCG": "S",
"CCU": "P", "CCC": "P", "CCA": "P", "CCG": "P",
"ACU": "T", "ACC": "T", "ACA": "T", "ACG": "T", 
"UAU": "Y", "UAC": "Y",
"UAA": "STOP", "UAG": "STOP", "UGA": "STOP",
"CAU": "H", "CAC": "H",
"CAA": "Q", "CAG": "Q",
"AAU": "N", "AAC": "N",
"AAA": "K", "AAG": "K",
"GAU": "D", "GAC": "D", 
"GAA": "E", "GAG": "E",
"UGU": "C", "UGC": "C",
"UGG": "W",
"CGU": "R", "CGC": "R", "CGA": "R", "CGG": "R", "AGA": "R", "AGG": "R",
"AGU":"S", "AGC":"S",
"GGU": "G", "GGC": "G", "GGA": "G", "GGG": "G" }


In [5]:
class RNA:
    """A class used to store RNA sequences, protein counterpart, 
    and frequency of secondary characteristics"""
    RNA_sequence=""
    protein=""
    falpha=0   #frequency of alpha helices
    fbeta=0 #frequency of beta sheets
    
    def create(self,length=1000):
        """Creates random RNA sequence and translates it to 
        protein sequence"""
        #creates list of nucleotides
        RNA = random.choices(['A','C','G','U'], k=length)
        for i in RNA:
            self.RNA_sequence = self.RNA_sequence+i
        seq= self.RNA_sequence 
        seqLength = len(seq)
        self.protein=[]
        #for loop that groups nucleaic acids into codons and then 
        #into amino acids
        for i in range(3,seqLength + 1,3): 
                self.protein.append(Codons.get(seq[i-3:i]))

In [6]:
A= RNA()
A.create()

In [7]:
A.RNA_sequence

'GACAUUCCGCUCUUCGAAGUUAAACACUUCGCACUUGAGGCUAAGUCGAGGGAAUGAUGAUCAAACUGGCAGCACGCGACCUAUUUUUUGCCAAAUAGGGGCCAGAUCCUUUCUAUCGCAGAAAGCGAUGCUGGUCCGCUUCACGACGAUGACGGAUAUUCUCCUGUGUGCAACGGAUAUAAGCGCGAUUGCGUUUAUAUCGGCAAAGUACCUAAGUUUAAUCUCACCUGUGGUGUAUCUAGGAGCCCUAGUUGACAGUCCUGACAUAUAUAAGUGACCCGCCCAAGGCCUCCUUCGCAACACUCGCUGAGAGUACACUUACCAUAUGAACGUAGAUCUUCGGGUAUUUAUAUAUGCGUUGUCGAGAAUUGCUGACCGGAGCGUCUAUAGGUCAACACCCCUCUUGGGCCUUGCAGUGUAAGUAGACGAAACUAUGGCGGAGUGAGCUCGGGUGCUGACGUAAAUCUGUGCGACGGAGCAUCAAGACUAGGUAAAACUUCACAUAGUAUCCUUAACAUUGGUGGAUACGCGCGUAUUAGAUCUUGAAGCAAGUUUGGCGUCACACAGAAGCCCAGUUCCAUGGCGUAACUACCAGUCUGUGUCGUGCUUCUCUGCUCCUAAAAGUCCAAUCCCAUUACUUGAUAGACACCGGUAGACUCUGCAUAGGUUAGGACAUCGGACGUGCUUCGAUCGUUAGCUAUGCAUGCAAGGGUGAGGUUUGCCUUUCGGUAGUUAAGAAAACUUCCUGACGAUCCAUCGGCGCAAUAUGAUCGCACAGAUUUAUUAGCAGGAACAUGCACCGAUUUGGAUAGAUGAUGCGAGGCGUAGAGCAUAGAAAAUGUUAUGACUAGAUCACUAGUCCGGCAUUUAUAAGUAUCGAUGAGCGCUAGAGGCAAUAUGAUCCUACAUGUGAUAUUAUGACAAGGAGGGAUUUAAUCGUAUCGACAUAUCGAGAUGUACAUUCCAGUCUGUCGUCUUUUUGCUCACAGCCCUUCCGGA

In [8]:
len(A.RNA_sequence)

1000

In [9]:
A.protein

['D',
 'I',
 'P',
 'L',
 'F',
 'E',
 'V',
 'K',
 'H',
 'F',
 'A',
 'L',
 'E',
 'A',
 'K',
 'S',
 'R',
 'E',
 'STOP',
 'STOP',
 'S',
 'N',
 'W',
 'Q',
 'H',
 'A',
 'T',
 'Y',
 'F',
 'L',
 'P',
 'N',
 'R',
 'G',
 'Q',
 'I',
 'L',
 'S',
 'I',
 'A',
 'E',
 'S',
 'D',
 'A',
 'G',
 'P',
 'L',
 'H',
 'D',
 'D',
 'D',
 'G',
 'Y',
 'S',
 'P',
 'V',
 'C',
 'N',
 'G',
 'Y',
 'K',
 'R',
 'D',
 'C',
 'V',
 'Y',
 'I',
 'G',
 'K',
 'V',
 'P',
 'K',
 'F',
 'N',
 'L',
 'T',
 'C',
 'G',
 'V',
 'S',
 'R',
 'S',
 'P',
 'S',
 'STOP',
 'Q',
 'S',
 'STOP',
 'H',
 'I',
 'STOP',
 'V',
 'T',
 'R',
 'P',
 'R',
 'P',
 'P',
 'S',
 'Q',
 'H',
 'S',
 'L',
 'R',
 'V',
 'H',
 'L',
 'P',
 'Y',
 'E',
 'R',
 'R',
 'S',
 'S',
 'G',
 'I',
 'Y',
 'I',
 'C',
 'V',
 'V',
 'E',
 'N',
 'C',
 'STOP',
 'P',
 'E',
 'R',
 'L',
 'STOP',
 'V',
 'N',
 'T',
 'P',
 'L',
 'G',
 'P',
 'C',
 'S',
 'V',
 'S',
 'R',
 'R',
 'N',
 'Y',
 'G',
 'G',
 'V',
 'S',
 'S',
 'G',
 'A',
 'D',
 'V',
 'N',
 'L',
 'C',
 'D',
 'G',
 'A',
 'S',
 'R',
 'L',
 

In [21]:
sampi= random.choices(['A','C','G','U'], k=100)

In [22]:
print(sampi)

['U', 'C', 'G', 'A', 'U', 'U', 'U', 'A', 'G', 'A', 'A', 'U', 'U', 'G', 'G', 'C', 'C', 'C', 'C', 'G', 'U', 'U', 'G', 'C', 'C', 'G', 'A', 'C', 'C', 'C', 'C', 'G', 'A', 'A', 'C', 'A', 'U', 'A', 'C', 'A', 'U', 'C', 'A', 'A', 'A', 'A', 'C', 'G', 'C', 'A', 'A', 'U', 'A', 'U', 'G', 'C', 'C', 'A', 'G', 'A', 'G', 'A', 'U', 'A', 'U', 'A', 'U', 'C', 'U', 'A', 'U', 'C', 'A', 'U', 'C', 'C', 'U', 'A', 'U', 'U', 'A', 'A', 'C', 'U', 'C', 'C', 'U', 'G', 'G', 'A', 'G', 'C', 'C', 'A', 'U', 'A', 'A', 'G', 'C', 'C']


In [23]:
RNA=""
for i in sampi:
    RNA = RNA+i
print(RNA)
    

UCGAUUUAGAAUUGGCCCCGUUGCCGACCCCGAACAUACAUCAAAACGCAAUAUGCCAGAGAUAUAUCUAUCAUCCUAUUAACUCCUGGAGCCAUAAGCC
