# Biopython : Sequence Record

* SeqRecord : 서열 외에 다른 정보들도 관리할 수 있는 객체

In [1]:
from Bio.Seq import Seq 
from Bio.SeqRecord import SeqRecord

In [2]:
seq = Seq("TATAAAGGCAATATGCAGTAG")
seqRec = SeqRecord(seq)
seqRec

SeqRecord(seq=Seq('TATAAAGGCAATATGCAGTAG'), id='<unknown id>', name='<unknown name>', description='<unknown description>', dbxrefs=[])

In [3]:
seqRec = SeqRecord(seq, id='NBCC01', name='Test01')
seqRec

SeqRecord(seq=Seq('TATAAAGGCAATATGCAGTAG'), id='NBCC01', name='Test01', description='<unknown description>', dbxrefs=[])

In [4]:
# SeqRecord 객체의 속성 변경
seqRec.name = 'DNA001'
seqRec

SeqRecord(seq=Seq('TATAAAGGCAATATGCAGTAG'), id='NBCC01', name='DNA001', description='<unknown description>', dbxrefs=[])

In [5]:
seqRec.id = "NC_1111" 
seqRec.name = "GeneA" 
seqRec.description = "This is a description." 
seqRec.annotations["Annotation_Key1"] = "Annotation_Value1" 
seqRec.annotations["Annotation_Key2"] = "Annotation_Value2" 
print(seqRec)

ID: NC_1111
Name: GeneA
Description: This is a description.
Number of features: 0
/Annotation_Key1=Annotation_Value1
/Annotation_Key2=Annotation_Value2
Seq('TATAAAGGCAATATGCAGTAG')


### file 읽어서 SeqRecord 만들기

In [6]:
from Bio import SeqIO 

In [7]:
# FASTA
record = SeqIO.read("data/J01636.1.fasta","fasta") 
print(type(record))
print(record)

<class 'Bio.SeqRecord.SeqRecord'>
ID: J01636.1
Name: J01636.1
Description: J01636.1 E.coli lactose operon with lacI, lacZ, lacY and lacA genes
Number of features: 0
Seq('GACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCGGAA...GAC', SingleLetterAlphabet())


In [8]:
# GenBank
record = SeqIO.read("data/J01636.1.gbk","genbank") 
print(type(record))
print(record)

<class 'Bio.SeqRecord.SeqRecord'>
ID: J01636.1
Name: ECOLAC
Description: E.coli lactose operon with lacI, lacZ, lacY and lacA genes
Number of features: 22
/molecule_type=DNA
/topology=linear
/data_file_division=BCT
/date=05-MAY-1993
/accessions=['J01636', 'J01637', 'K01483', 'K01793']
/sequence_version=1
/keywords=['acetyltransferase', 'beta-D-galactosidase', 'galactosidase', 'lac operon', 'lac repressor protein', 'lacA gene', 'lacI gene', 'lacY gene', 'lacZ gene', 'lactose permease', 'mutagenesis', 'palindrome', 'promoter region', 'thiogalactoside acetyltransferase']
/source=Escherichia coli
/organism=Escherichia coli
/taxonomy=['Bacteria', 'Proteobacteria', 'Gammaproteobacteria', 'Enterobacterales', 'Enterobacteriaceae', 'Escherichia']
/references=[Reference(title='The nucleotide sequence of the lac operator', ...), Reference(title='The nucleotide sequence of the lactose messenger ribonucleic acid transcribed from the UV5 promoter mutant of Escherichia coli', ...), Reference(title='S

In [9]:
len(record.features)

22

In [10]:
record.annotations.keys()

dict_keys(['molecule_type', 'topology', 'data_file_division', 'date', 'accessions', 'sequence_version', 'keywords', 'source', 'organism', 'taxonomy', 'references', 'comment'])

In [11]:
record.annotations['organism']

'Escherichia coli'

#### SeqRecord 객체 간 비교 : 속성끼리 비교해야 함

In [12]:
seq1 = Seq("ACGT")
record1 = SeqRecord(seq1)

seq2 = Seq("ACGT")
record2 = SeqRecord(seq2)

In [13]:
print(record1 == record2)   # NotImplementedError

NotImplementedError: SeqRecord comparison is deliberately not implemented. Explicitly compare the attributes of interest.

In [14]:
print(record1.seq == record2.seq)

True
