## Exemplos de BioPython - módulo *Seq*

Carregar os packages

In [1]:
from Bio.Seq import Seq
from Bio.SeqUtils import GC

Criar sequencias de DNA e proteinas

In [2]:
my_dna = Seq("AGTACACTGGT") 
my_dna

Seq('AGTACACTGGT')

In [3]:
print(my_dna)

AGTACACTGGT


In [4]:
my_prot = Seq("YAGSBHSYA")
my_prot

Seq('YAGSBHSYA')

In [5]:
print(my_prot)

YAGSBHSYA


Iterar sobre sequências

In [6]:
for a in my_dna:
    print(a)

A
G
T
A
C
A
C
T
G
G
T


In [7]:
for index, letter in enumerate(my_dna):
    print(index, letter)

0 A
1 G
2 T
3 A
4 C
5 A
6 C
7 T
8 G
9 G
10 T


Comprimento e contagens - exemplo de cálculo do conteúdo GC

In [8]:
print(my_dna.count("G"))
print(my_dna.count("GT"))

3
2


In [9]:
my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC")
len(my_seq)

32

In [10]:
print("GC content")
print(100 * float(my_seq.count("G") + my_seq.count("C")) / len(my_seq), " %")

GC content
46.875  %


Indexação e slicing - semelhante a lists e strings

In [11]:
print(my_seq[2])
print(my_seq[4:12])
print(my_seq[-2])
print(my_seq[::3])
print(my_seq[1::3])

T
GATGGGCC
G
GCTGTAGTAAG
AGGCATGCATC


Conversão para string (podendo depois usar-se todas as funções ai definidas)

In [12]:
print(str(my_seq))
st_seq = str(my_seq)
print(st_seq.index("TAT")) # index nao existe no objeto Seq

GATCGATGGGCCTATATAGGATCGAAAATCGC
12


Funções para procura de padrões

In [13]:
print(my_seq.find("TAC"))
print(my_seq.find("TATA"))
print(my_seq.find("TA"))
print(my_seq.rfind("TA"))
print("GTAC" in my_seq)

-1
12
12
16
False


Conversão para minúsculas/ maiúsculas

In [14]:
my_seq.lower()
## também existe upper()

Seq('gatcgatgggcctatataggatcgaaaatcgc')

Comparação de sequências

In [15]:
seq1 = Seq("ACGT")
seq2 = Seq("ACGT")

## == testa conteudo (string)
print((seq1==seq2))
print((str(seq1)==str(seq2)))

## is testa se é o mesmo objeto
print(seq1 is seq2)
seq3 = seq1
print(seq3 is seq1)

True
True
False
True


Concatenação de sequências

In [16]:
seq1 = Seq("EVRNAK")
seq2 = Seq("GHERW")
print(seq1+seq2)

EVRNAKGHERW


Complemento inverso de DNA

In [17]:
coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTAG") 
template_dna = coding_dna.reverse_complement()
print(template_dna)

CTAGCGGCCCATTACAATGGCCAT


Transcrição e transcrição reversa

In [18]:
messenger_rna = coding_dna.transcribe() 
print(messenger_rna)

rna_seq = Seq('CGUUUAACU')
print(rna_seq.back_transcribe())

AUGGCCAUUGUAAUGGGCCGCUAG
CGTTTAACT


Tradução de DNA e RNA

In [19]:
print(coding_dna.translate() )

print(rna_seq.translate())

MAIVMGR*
RLT


Tabelas de tradução

In [20]:
from Bio.Data import CodonTable 
standard_table = CodonTable.unambiguous_dna_by_name["Standard"] 
mito_table = CodonTable.unambiguous_dna_by_name["Vertebrate Mitochondrial"]

print(standard_table)
print(mito_table)
print(mito_table.stop_codons)
print(mito_table.forward_table["ACG"])

coding_dna2 = Seq("ATGGATACCAGA")
print(coding_dna2.translate())
print(coding_dna2.translate(table="Vertebrate Mitochondrial") )

Table 1 Standard, SGC0

  |  T      |  C      |  A      |  G      |
--+---------+---------+---------+---------+--
T | TTT F   | TCT S   | TAT Y   | TGT C   | T
T | TTC F   | TCC S   | TAC Y   | TGC C   | C
T | TTA L   | TCA S   | TAA Stop| TGA Stop| A
T | TTG L(s)| TCG S   | TAG Stop| TGG W   | G
--+---------+---------+---------+---------+--
C | CTT L   | CCT P   | CAT H   | CGT R   | T
C | CTC L   | CCC P   | CAC H   | CGC R   | C
C | CTA L   | CCA P   | CAA Q   | CGA R   | A
C | CTG L(s)| CCG P   | CAG Q   | CGG R   | G
--+---------+---------+---------+---------+--
A | ATT I   | ACT T   | AAT N   | AGT S   | T
A | ATC I   | ACC T   | AAC N   | AGC S   | C
A | ATA I   | ACA T   | AAA K   | AGA R   | A
A | ATG M(s)| ACG T   | AAG K   | AGG R   | G
--+---------+---------+---------+---------+--
G | GTT V   | GCT A   | GAT D   | GGT G   | T
G | GTC V   | GCC A   | GAC D   | GGC G   | C
G | GTA V   | GCA A   | GAA E   | GGA G   | A
G | GTG V   | GCG A   | GAG E   | GGG G   | G
--+---------