# Análise de homologias por BLAST

In [3]:
from Bio.Blast import NCBIXML 
from Bio.Blast import NCBIWWW 

In [4]:
def blast(accession : str, program : str, database : str, filename : str):
    """
    função que executa um blast de uma sequência do NCBI e guarda o resultado
    recebe o identificador da seq no NCBI, o tipo de programa de blast, a base de dados usada 
    e o nome do ficheiro que queremos guardar a informação
    """
    result_handle = NCBIWWW.qblast(program, database, accession)

    save_file = open(filename, "w")
    save_file.write(result_handle.read())
    save_file.close()
    result_handle.close()

    result_handle = open(filename)

    blast_record = NCBIXML.read(result_handle)

    return blast_record

In [5]:
def blast_filter(blast_record, e_value_threshold : float, coverage_threshold : float, per_identity_threshold : float):
    filtered_alignments = []
    for alignment in blast_record.alignments:
        for hsp in alignment.hsps:
            # coverage = hsp.align_length / blast_record.query_length
            e_value = hsp.expect
            coverage = (hsp.query_end - hsp.query_start + 1) / blast_record.query_length * 100
            per_identity = (hsp.identities / hsp.align_length) * 100
            if e_value <= e_value_threshold and coverage >= coverage_threshold and per_identity >= per_identity_threshold:
                print('\n****Alignment****')
                print('acession:', alignment.accession)
                print('title:', alignment.title)
                print('alignment length:', alignment.length)
                print('e value:', hsp.expect)
                #print('hsp length:', hsp.align_length)
                #print('hsps:', len(alignment.hsps))
                filtered_alignments.append(alignment)
    return filtered_alignments

## Gene FLG

BLAST contra a base de dados swissprot

In [16]:
flg_swissprot_blast_record = blast("60097902", "blastp", "swissprot", "flg_protein_blast")

In [17]:
print(len(flg_swissprot_blast_record.alignments), "hits")

22 hits


In [20]:
flg_filtered_blast = blast_filter(flg_swissprot_blast_record, 0.05, 80, 80)
flg_filtered_blast


****Alignment****
acession: P20930
title: sp|P20930.3| RecName: Full=Filaggrin [Homo sapiens]
alignment length: 4061
e value: 0.0


[<Bio.Blast.Record.Alignment at 0x292ac4b4a60>]

In [21]:
if len(flg_filtered_blast) == 1:
    print(len(flg_filtered_blast), "filtered sequence")
elif len(flg_filtered_blast) == 0 or len(flg_filtered_blast) >= 1:
    print(len(flg_filtered_blast), "filtered sequences")

1 filtered sequence


BLAST contra a base de dados nr (non-redundant)

**Não esquecer de ver a abrangencia taxonomica e os dominios conservados das seqs homologas e comparar com a query**

## Gene C11ORF30

BLAST contra a base de dados swissprot

In [25]:
c11orf30_swissprot_blast_record = blast("47605660", "blastp", "swissprot", "c11orf30_protein_blast")

In [26]:
print(len(c11orf30_swissprot_blast_record.alignments), "hits")

3 hits


In [27]:
c11orf30_filtered_blast = blast_filter(c11orf30_swissprot_blast_record, 0.05, 80, 80)
c11orf30_filtered_blast


****Alignment****
acession: Q7Z589
title: gi|47605660|sp|Q7Z589.2|EMSY_HUMAN RecName: Full=BRCA2-interacting transcriptional repressor EMSY
alignment length: 1322
e value: 0.0

****Alignment****
acession: Q8BMB0
title: sp|Q8BMB0.2| RecName: Full=BRCA2-interacting transcriptional repressor EMSY [Mus musculus]
alignment length: 1264
e value: 0.0


[<Bio.Blast.Record.Alignment at 0x292ae8319a0>,
 <Bio.Blast.Record.Alignment at 0x292ae7fac70>]

In [None]:
if len(c11orf30_filtered_blast) == 1:
    print(len(c11orf30_filtered_blast), "filtered sequence")
elif len(c11orf30_filtered_blast) == 0 or len(c11orf30_filtered_blast) >= 1:
    print(len(c11orf30_filtered_blast), "filtered sequences")

BLAST contra a base de dados nr (non-redundant)

## Gene TSLP

BLAST contra a base de dados swissprot

In [6]:
tslp_swissprot_blast_record = blast("AAK67490", "blastp", "swissprot", "tslp_protein_blast")

In [None]:
print(len(tslp_swissprot_blast_record.alignments), "hits")

NameError: name 'tslp_swissprot_blast_record' is not defined

In [None]:
tslp_filtered_blast = blast_filter(tslp_swissprot_blast_record, 0.05, 80, 80)
tslp_filtered_blast

In [None]:
if len(tslp_filtered_blast) == 1:
    print(len(tslp_filtered_blast), "filtered sequence")
elif len(tslp_filtered_blast) == 0 or len(tslp_filtered_blast) >= 1:
    print(len(tslp_filtered_blast), "filtered sequences")

BLAST contra a base de dados nr (non-redundant)