In [2]:
from Bio import Entrez
from Bio import SeqIO

In [13]:
Entrez.email = "...@gmail.com" 
handle = Entrez.efetch(db="nucleotide", rettype="gb", retmode="text", id="1049011034") 
for seq_record in SeqIO.parse(handle, "gb"):
    print (seq_record.id, seq_record.description)
    print ("Sequence length: ", len(seq_record)) 
    print (len(seq_record.features), " features" ) 
    print ("from: ", seq_record.annotations["source"] )
handle.close() 

AH002947.2 Homo sapiens profilaggrin (FLG) gene, complete cds
Sequence length:  4929
11  features
from:  Homo sapiens (human)


In [21]:
Entrez.email = "...@gmail.com" 
def access_ncbi(accession : str):
    """
    função para aceder e extraír a informação do NCBI
    recebe o accession id da sequência do NCBI
    """
    handle = Entrez.efetch(db="nucleotide", id=accession, rettype="gb", retmode="text")
    record = SeqIO.read(handle, "gb")
    handle.close()
    return record

def save_genbank_file(record, filename : str):
    """
    função para guardar o ficheiro genbank
    recebe o record obtido na função access_ncbi e o nome que queremos atribuir ao novo ficheiro
    """
    return SeqIO.write(record, filename, "genbank")


In [32]:
flg_record = access_ncbi("1049011034")

save_genbank_file(flg_record, "flg_file.gb")

1

In [60]:
def read_genbank_file(filename : str):
    """
    função para ler o ficheiro genbank criado
    recebe o nome do ficheiro criado
    """
    record = SeqIO.read(filename, "genbank")
    return record

def annotations(record):
    """
    função que mostra as anotações do ficheiro genbank criado
    recebe o record obtido ao ler o ficheiro através da função read_genbank_file
    """
    print("ID: ", record.id)
    print("Name:", record.name)
    print("Description: ", record.description)
    print("Sequence length: ", len(record))  
    print("from: ", record.annotations["source"])
    print("Taxonomy:\n", record.annotations["taxonomy"])
    print("General annotations:\n", record.annotations)
    if len(record.dbxrefs) != 0:
        print("External Databases References:\n", record.dbxrefs) 

In [61]:
flg_file = read_genbank_file("flg_file.gb")

annotations(flg_file)

ID:  AH002947.2
Name: AH002947
Description:  Homo sapiens profilaggrin (FLG) gene, complete cds
Sequence length:  4929
from:  Homo sapiens (human)
Taxonomy:
 ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Euarchontoglires', 'Primates', 'Haplorrhini', 'Catarrhini', 'Hominidae', 'Homo']
General annotations:
 {'molecule_type': 'DNA', 'topology': 'linear', 'data_file_division': 'PRI', 'date': '01-AUG-2016', 'accessions': ['AH002947'], 'sequence_version': 2, 'keywords': ['filaggrin', 'profilaggrin'], 'source': 'Homo sapiens (human)', 'organism': 'Homo sapiens', 'taxonomy': ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Euarchontoglires', 'Primates', 'Haplorrhini', 'Catarrhini', 'Hominidae', 'Homo'], 'references': [Reference(title='Characterization of the human epidermal profilaggrin gene. Genomic organization and identification of an S-100-like calcium binding domain at the amin

In [82]:
def features_qualifiers(record):
    """
    função que mostra as features e os seus qualifiers
    recebe o record obtido ao ler o ficheiro através da função read_genbank_file
    """
    print(len(record.features), "features\n")
    print("Type and Location:")
    for feature in record.features:
        print(feature.type, feature.location)
    print("\nQualifiers:")
    for k in range(len(record.features)):
        print(record.features[k].qualifiers)

In [83]:
features_qualifiers(flg_file)

11 features

Type and Location:
source [0:4929](+)
gene [232:4929](+)
regulatory [232:239](+)
exon [259:313](+)
intron order{[313:663](+), [763:2564](+)}
gap [663:763](+)
protein_bind [2376:2384](+)
exon [2564:2723](+)
CDS join{[2585:2723](+), [3293:4929](+)}
intron [2723:3293](+)
exon [3293:4929](+)

Qualifiers:
OrderedDict([('organism', ['Homo sapiens']), ('mol_type', ['genomic DNA']), ('db_xref', ['taxon:9606']), ('map', ['1q21']), ('sex', ['male caucasian']), ('tissue_type', ['placenta']), ('dev_stage', ['adult']), ('tissue_lib', ['Stratagene'])])
OrderedDict([('gene', ['FLG'])])
OrderedDict([('regulatory_class', ['TATA_box']), ('gene', ['FLG']), ('note', ['G00-119-912; putative'])])
OrderedDict([('gene', ['FLG']), ('experiment', ['experimental evidence, no additional details recorded']), ('note', ['G00-119-912']), ('number', ['1'])])
OrderedDict([('gene', ['FLG']), ('note', ['G00-119-912']), ('number', ['1'])])
OrderedDict([('estimated_length', ['unknown'])])
OrderedDict([('gene',