# Bio.PDB 모듈

In [1]:
from Bio import PDB

In [2]:
# pdb download
repository = PDB.PDBList()
repository.retrieve_pdb_file('1TUP', pdir='./data', file_format='pdb')
repository.retrieve_pdb_file('1OLG', pdir='./data', file_format='pdb')
repository.retrieve_pdb_file('1YCQ', pdir='./data', file_format='pdb')

Structure exists: './data\pdb1tup.ent' 
Structure exists: './data\pdb1olg.ent' 
Structure exists: './data\pdb1ycq.ent' 


'./data\\pdb1ycq.ent'

In [3]:
# pdb parsing
parser = PDB.PDBParser()
p53_1tup = parser.get_structure('P 53 - DNA Binding', './data/pdb1tup.ent')
p53_1olg = parser.get_structure('P 53 - Tetramerization', './data/pdb1olg.ent')
p53_1ycq = parser.get_structure('P 53 - Transactivation', './data/pdb1ycq.ent')



In [4]:
# header 정보 확인
def print_pdb_headers(headers, indent=0):
    ind_text = ' ' * indent
    for header, content in headers.items():
        if type(content) == dict:
            print('\n%s%20s:' % (ind_text, header))
            print_pdb_headers(content, indent + 4)
            print()
        elif type(content) == list:
            print('%s%20s:' % (ind_text, header))
            for elem in content:
                print('%s%21s %s' % (ind_text, '->', elem))
        else:
            print('%s%20s: %s' % (ind_text, header, content))

print_pdb_headers(p53_1tup.header)

                name: tumor suppressor p53 complexed with dna
                head: antitumor protein/dna
              idcode: 1TUP
     deposition_date: 1995-07-11
        release_date: 1995-07-11
    structure_method: x-ray diffraction
          resolution: 2.2
 structure_reference:
                   -> n.p.pavletich,k.a.chambers,c.o.pabo the dna-binding domain of p53 contains the four conserved regions and the major mutation hot spots genes dev. v. 7 2556 1993 issn 0890-9369 
                   -> b.vogelstein,k.w.kinzler p53 function and dysfunction cell(cambridge,mass.) v. 70 523 1992 issn 0092-8674 
   journal_reference: y.cho,s.gorina,p.d.jeffrey,n.p.pavletich crystal structure of a p53 tumor suppressor-dna complex: understanding tumorigenic mutations. science v. 265 346 1994 issn 0036-8075 8023157 
              author: Y.Cho,S.Gorina,P.D.Jeffrey,N.P.Pavletich

            compound:

                       1:
                        misc: 
                    molecule: dna (5

In [5]:
# chain 정보 확인
print(p53_1tup.header['compound'])  # ABC : protein, EF : DNA
print(p53_1olg.header['compound'])
print(p53_1ycq.header['compound'])

{'1': {'misc': '', 'molecule': "dna (5'-d(*tp*tp*tp*cp*cp*tp*ap*gp*ap*cp*tp*tp*gp*cp*cp*cp*a p*ap*tp*tp*a)-3') ", 'chain': 'e', 'engineered': 'yes'}, '2': {'misc': '', 'molecule': "dna (5'-d(*ap*tp*ap*ap*tp*tp*gp*gp*gp*cp*ap*ap*gp*tp*cp*tp*a p*gp*gp*ap*a)-3') ", 'chain': 'f', 'engineered': 'yes'}, '3': {'misc': '', 'molecule': 'protein (p53 tumor suppressor )', 'chain': 'a, b, c', 'engineered': 'yes'}}
{'1': {'misc': '', 'molecule': 'tumor suppressor p53 (oligomerization domain)', 'chain': 'a, b, c, d', 'engineered': 'yes'}}
{'1': {'misc': '', 'molecule': 'mdm2', 'chain': 'a', 'synonym': 'mdm2', 'engineered': 'yes'}, '2': {'misc': '', 'molecule': 'p53', 'chain': 'b', 'fragment': 'residues 13 - 29', 'engineered': 'yes'}}


In [6]:
# 각 체인별 residue 수, atom 수 확인
def describe_model(name, pdb):
    print()
    for model in pdb:
        for chain in model:
            print('%s - Chain: %s. residues: %d. atoms: %d.' %
                  (name, chain.id, len(chain), len(list(chain.get_atoms()))))
            
describe_model('1TUP', p53_1tup)
describe_model('1OLG', p53_1olg)
describe_model('1YCQ', p53_1ycq)


1TUP - Chain: E. residues: 43. atoms: 442.
1TUP - Chain: F. residues: 35. atoms: 449.
1TUP - Chain: A. residues: 395. atoms: 1734.
1TUP - Chain: B. residues: 265. atoms: 1593.
1TUP - Chain: C. residues: 276. atoms: 1610.

1OLG - Chain: A. residues: 42. atoms: 698.
1OLG - Chain: B. residues: 42. atoms: 698.
1OLG - Chain: C. residues: 42. atoms: 698.
1OLG - Chain: D. residues: 42. atoms: 698.

1YCQ - Chain: A. residues: 123. atoms: 741.
1YCQ - Chain: B. residues: 16. atoms: 100.


In [7]:
# HETATM 중 물 분자 이외의 분자 출력
for residue in p53_1tup.get_residues():
    if residue.id[0] in [' ', 'W']:
        continue
    print(residue.id)

('H_ ZN', 951, ' ')
('H_ ZN', 952, ' ')
('H_ ZN', 953, ' ')


In [8]:
# chain A의 첫번째 아미노산과 원자
res = next(p53_1tup[0]['A'].get_residues())
print(res)

for atom in res:
    print(atom, atom.serial_number, atom.element)
    
print(p53_1tup[0]['A'][94]['CA'])

<Residue SER het=  resseq=94 icode= >
<Atom N> 858 N
<Atom CA> 859 C
<Atom C> 860 C
<Atom O> 861 O
<Atom CB> 862 C
<Atom OG> 863 O
<Atom CA>


In [9]:
# 특정 체인을 fasta 파일로 저장

from Bio.SeqIO import PdbIO, FastaIO

def get_fasta(pdb_file, fasta_file, transfer_ids=None):
    fasta_writer = FastaIO.FastaWriter(fasta_file)
    fasta_writer.write_header()
    for rec in PdbIO.PdbSeqresIterator(pdb_file):
        if len(rec.seq) == 0:
            continue
        if transfer_ids is not None and rec.id not in transfer_ids:
            continue
        print(rec.id, rec.seq, len(rec.seq))
        fasta_writer.write_record(rec)
        
get_fasta(open('./data/pdb1tup.ent'), open('./data/1tup.fasta', 'w'), transfer_ids=['1TUP:B'])
get_fasta(open('./data/pdb1olg.ent'), open('./data/1olg.fasta', 'w'), transfer_ids=['1OLG:B'])
get_fasta(open('./data/pdb1ycq.ent'), open('./data/1ycq.fasta', 'w'), transfer_ids=['1YCQ:B'])

1TUP:B SSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNT 219
1OLG:B KKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPG 42
1YCQ:B PLSQETFSDLWKLLPEN 17
