# Análise de homologias por BLAST

In [1]:
# blast
from Bio.Blast import NCBIXML 
from Bio.Blast import NCBIWWW 

In [2]:
help(NCBIWWW.qblast)

Help on function qblast in module Bio.Blast.NCBIWWW:

qblast(program, database, sequence, url_base='https://blast.ncbi.nlm.nih.gov/Blast.cgi', auto_format=None, composition_based_statistics=None, db_genetic_code=None, endpoints=None, entrez_query='(none)', expect=10.0, filter=None, gapcosts=None, genetic_code=None, hitlist_size=50, i_thresh=None, layout=None, lcase_mask=None, matrix_name=None, nucl_penalty=None, nucl_reward=None, other_advanced=None, perc_ident=None, phi_pattern=None, query_file=None, query_believe_defline=None, query_from=None, query_to=None, searchsp_eff=None, service=None, threshold=None, ungapped_alignment=None, word_size=None, short_query=None, alignments=500, alignment_view=None, descriptions=500, entrez_links_new_window=None, expect_low=None, expect_high=None, format_entrez_query=None, format_object=None, format_type='XML', ncbi_gi=None, results_file=None, show_overview=None, megablast=None, template_type=None, template_length=None)
    BLAST search using NCBI's

# BLAST

In [3]:
def blast(accession : str, program : str, database : str, filename : str):
    """
    função que executa um blast de uma sequência do NCBI e guarda o resultado
    recebe o identificador da seq no NCBI, o tipo de programa de blast, a base de dados usada 
    e o nome do ficheiro que queremos guardar a informação
    """
    result_handle = NCBIWWW.qblast(program, database, accession)

    save_file = open(filename, "w")
    save_file.write(result_handle.read())
    save_file.close()
    result_handle.close()

    result_handle = open(filename)

    blast_record = NCBIXML.read(result_handle)

    return blast_record

In [4]:
flg_blast_record = blast("60097902", "blastp", "swissprot", "flg_protein_blast")

In [5]:
def blast_analysis(blast_record, e_value_threshold : float, coverage_threshold : float):
    print(len(blast_record.alignments), "hits")
    filtered_alignments = []
    for alignment in blast_record.alignments:
        for hsp in alignment.hsps:
            coverage = hsp.align_length / blast_record.query_length
            if hsp.expect <= e_value_threshold and coverage >= coverage_threshold:
                print('\n****Alignment****')
                print('acession:', alignment.accession)
                print('title:', alignment.title)
                print('alignment length:', alignment.length)
                print('e value:', hsp.expect)
                #print('hsp length:', hsp.align_length)
                #print('hsps:', len(alignment.hsps))
                filtered_alignments.append(alignment)
    return filtered_alignments

In [6]:
flg_filtered_blast = blast_analysis(flg_blast_record, 0.05, 0)
flg_filtered_blast
#len(flg_filtered_blast)

22 hits

****Alignment****
acession: P20930
title: sp|P20930.3| RecName: Full=Filaggrin [Homo sapiens]
alignment length: 4061
e value: 0.0

****Alignment****
acession: Q8VHD8
title: sp|Q8VHD8.1| RecName: Full=Hornerin [Mus musculus]
alignment length: 2496
e value: 7.52798e-46

****Alignment****
acession: Q86YZ3
title: sp|Q86YZ3.2| RecName: Full=Hornerin [Homo sapiens]
alignment length: 2850
e value: 3.60979e-38

****Alignment****
acession: Q5D862
title: sp|Q5D862.1| RecName: Full=Filaggrin-2; Short=FLG-2; AltName: Full=Intermediate filament-associated and psoriasis-susceptibility protein; Short=Ifapsoriasin [Homo sapiens]
alignment length: 2391
e value: 7.59274e-38

****Alignment****
acession: Q5D862
title: sp|Q5D862.1| RecName: Full=Filaggrin-2; Short=FLG-2; AltName: Full=Intermediate filament-associated and psoriasis-susceptibility protein; Short=Ifapsoriasin [Homo sapiens]
alignment length: 2391
e value: 2.10116e-30

****Alignment****
acession: Q5D862
title: sp|Q5D862.1| RecName: Fu

[<Bio.Blast.Record.Alignment at 0x27f830f8940>,
 <Bio.Blast.Record.Alignment at 0x27f830f8eb0>,
 <Bio.Blast.Record.Alignment at 0x27f830f8b20>,
 <Bio.Blast.Record.Alignment at 0x27f830f8f40>,
 <Bio.Blast.Record.Alignment at 0x27f830f8f40>,
 <Bio.Blast.Record.Alignment at 0x27f830f8f40>,
 <Bio.Blast.Record.Alignment at 0x27f830f8f40>,
 <Bio.Blast.Record.Alignment at 0x27f831090a0>,
 <Bio.Blast.Record.Alignment at 0x27f831091f0>,
 <Bio.Blast.Record.Alignment at 0x27f83109100>,
 <Bio.Blast.Record.Alignment at 0x27f83109250>,
 <Bio.Blast.Record.Alignment at 0x27f831092e0>,
 <Bio.Blast.Record.Alignment at 0x27f83109370>,
 <Bio.Blast.Record.Alignment at 0x27f83109400>,
 <Bio.Blast.Record.Alignment at 0x27f83109490>,
 <Bio.Blast.Record.Alignment at 0x27f83109520>,
 <Bio.Blast.Record.Alignment at 0x27f831095b0>,
 <Bio.Blast.Record.Alignment at 0x27f83109640>,
 <Bio.Blast.Record.Alignment at 0x27f831096d0>,
 <Bio.Blast.Record.Alignment at 0x27f83109760>,
 <Bio.Blast.Record.Alignment at 0x27f831

**Não esquecer de ver a abrangencia taxonomica e os dominios conservados das seqs homologas e comparar com a query**