# WS_ch08B.ipynb
### WESmith 04/18/23

## INTRODUCING BIO.PDB
#### (see book code in Chapter08/PDB.py)

### WS created this notebook to follow along with code from the book
### 'Bioinformatics with Python Cookbook' by Tiago Antao
#### Each recipe will have its own notebook, suffixed by A, B, etc.¶


In [1]:
from Bio import PDB

In [2]:
repository = PDB.PDBList()

In [3]:
repository.retrieve_pdb_file('1TUP', pdir='.', file_format='pdb')
repository.retrieve_pdb_file('1OLG', pdir='.', file_format='pdb')
repository.retrieve_pdb_file('1YCQ', pdir='.', file_format='pdb')

Downloading PDB structure '1tup'...
Downloading PDB structure '1olg'...
Downloading PDB structure '1ycq'...


'./pdb1ycq.ent'

In [5]:
parser = PDB.PDBParser()

In [6]:
p53_1tup = parser.get_structure('P 53 - DNA Binding', 'pdb1tup.ent')
p53_1olg = parser.get_structure('P 53 - Tetramerization', 'pdb1olg.ent')
p53_1ycq = parser.get_structure('P 53 - Transactivation', 'pdb1ycq.ent')



In [7]:
def print_pdb_headers(headers, indent=0):
    ind_text = ' ' * indent
    for header, content in headers.items():
        if type(content) == dict:
            print('\n%s%20s:' % (ind_text, header))
            print_pdb_headers(content, indent + 4)
            print()
        elif type(content) == list:
            print('%s%20s:' % (ind_text, header))
            for elem in content:
                print('%s%21s %s' % (ind_text, '->', elem))
        else:
            print('%s%20s: %s' % (ind_text, header, content))

In [9]:
print(p53_1tup.header.keys()) # WS

dict_keys(['name', 'head', 'idcode', 'deposition_date', 'release_date', 'structure_method', 'resolution', 'structure_reference', 'journal_reference', 'author', 'compound', 'source', 'has_missing_residues', 'missing_residues', 'keywords', 'journal'])


In [8]:
print_pdb_headers(p53_1tup.header)

                name: tumor suppressor p53 complexed with dna
                head: antitumor protein/dna
              idcode: 1TUP
     deposition_date: 1995-07-11
        release_date: 1995-07-11
    structure_method: x-ray diffraction
          resolution: 2.2
 structure_reference:
                   -> n.p.pavletich,k.a.chambers,c.o.pabo the dna-binding domain of p53 contains the four conserved regions and the major mutation hot spots genes dev. v. 7 2556 1993 issn 0890-9369 
                   -> b.vogelstein,k.w.kinzler p53 function and dysfunction cell(cambridge,mass.) v. 70 523 1992 issn 0092-8674 
   journal_reference: y.cho,s.gorina,p.d.jeffrey,n.p.pavletich crystal structure of a p53 tumor suppressor-dna complex: understanding tumorigenic mutations. science v. 265 346 1994 issn 0036-8075 8023157 
              author: Y.Cho,S.Gorina,P.D.Jeffrey,N.P.Pavletich

            compound:

                       1:
                        misc: 
                    molecule: dna (5

In [10]:
print(p53_1tup.header['compound'])
print(p53_1olg.header['compound'])
print(p53_1ycq.header['compound'])

{'1': {'misc': '', 'molecule': "dna (5'-d(*tp*tp*tp*cp*cp*tp*ap*gp*ap*cp*tp*tp*gp*cp*cp*cp*a p*ap*tp*tp*a)-3') ", 'chain': 'e', 'engineered': 'yes'}, '2': {'misc': '', 'molecule': "dna (5'-d(*ap*tp*ap*ap*tp*tp*gp*gp*gp*cp*ap*ap*gp*tp*cp*tp*a p*gp*gp*ap*a)-3') ", 'chain': 'f', 'engineered': 'yes'}, '3': {'misc': '', 'molecule': 'protein (p53 tumor suppressor )', 'chain': 'a, b, c', 'engineered': 'yes'}}
{'1': {'misc': '', 'molecule': 'tumor suppressor p53 (oligomerization domain)', 'chain': 'a, b, c, d', 'engineered': 'yes'}}
{'1': {'misc': '', 'molecule': 'mdm2', 'chain': 'a', 'synonym': 'mdm2', 'engineered': 'yes'}, '2': {'misc': '', 'molecule': 'p53', 'chain': 'b', 'fragment': 'residues 13 - 29', 'engineered': 'yes'}}


In [12]:
print_pdb_headers(p53_1tup.header['compound']) # WS a cleaner output


                   1:
                    misc: 
                molecule: dna (5'-d(*tp*tp*tp*cp*cp*tp*ap*gp*ap*cp*tp*tp*gp*cp*cp*cp*a p*ap*tp*tp*a)-3') 
                   chain: e
              engineered: yes


                   2:
                    misc: 
                molecule: dna (5'-d(*ap*tp*ap*ap*tp*tp*gp*gp*gp*cp*ap*ap*gp*tp*cp*tp*a p*gp*gp*ap*a)-3') 
                   chain: f
              engineered: yes


                   3:
                    misc: 
                molecule: protein (p53 tumor suppressor )
                   chain: a, b, c
              engineered: yes



In [13]:
print_pdb_headers(p53_1olg.header['compound']) # WS a cleaner output


                   1:
                    misc: 
                molecule: tumor suppressor p53 (oligomerization domain)
                   chain: a, b, c, d
              engineered: yes



In [14]:
print_pdb_headers(p53_1ycq.header['compound']) # WS a cleaner output


                   1:
                    misc: 
                molecule: mdm2
                   chain: a
                 synonym: mdm2
              engineered: yes


                   2:
                    misc: 
                molecule: p53
                   chain: b
                fragment: residues 13 - 29
              engineered: yes



In [15]:
def describe_model(name, pdb):
    print()
    for model in pdb:
        for chain in model:
            print('%s - Chain: %s. Number of residues: %d. Number of atoms: %d.' %
                  (name, chain.id, len(chain), len(list(chain.get_atoms()))))

In [16]:
describe_model('1TUP', p53_1tup)
describe_model('1OLG', p53_1olg)
describe_model('1YCQ', p53_1ycq)


1TUP - Chain: E. Number of residues: 43. Number of atoms: 442.
1TUP - Chain: F. Number of residues: 35. Number of atoms: 449.
1TUP - Chain: A. Number of residues: 395. Number of atoms: 1734.
1TUP - Chain: B. Number of residues: 265. Number of atoms: 1593.
1TUP - Chain: C. Number of residues: 276. Number of atoms: 1610.

1OLG - Chain: A. Number of residues: 42. Number of atoms: 698.
1OLG - Chain: B. Number of residues: 42. Number of atoms: 698.
1OLG - Chain: C. Number of residues: 42. Number of atoms: 698.
1OLG - Chain: D. Number of residues: 42. Number of atoms: 698.

1YCQ - Chain: A. Number of residues: 123. Number of atoms: 741.
1YCQ - Chain: B. Number of residues: 16. Number of atoms: 100.


In [38]:
# WS to explore chain object
dd = []
for model in p53_1tup:
    for chain in model:
        dd.append(chain)
ee = list(dd[0].get_atoms())

In [40]:
for residue in p53_1tup.get_residues():
    if residue.id[0] in [' ', 'W']:
        continue
    print(residue.id)

('H_ZN', 951, ' ')
('H_ZN', 952, ' ')
('H_ZN', 953, ' ')


In [41]:
type(p53_1tup.get_residues())

generator

In [45]:
# WS to explore residue object
gg = list(p53_1tup.get_residues())
for k in gg: 
    print(k.id)

(' ', 1001, ' ')
(' ', 1002, ' ')
(' ', 1003, ' ')
(' ', 1004, ' ')
(' ', 1005, ' ')
(' ', 1006, ' ')
(' ', 1007, ' ')
(' ', 1008, ' ')
(' ', 1009, ' ')
(' ', 1010, ' ')
(' ', 1011, ' ')
(' ', 1012, ' ')
(' ', 1013, ' ')
(' ', 1014, ' ')
(' ', 1015, ' ')
(' ', 1016, ' ')
(' ', 1017, ' ')
(' ', 1018, ' ')
(' ', 1019, ' ')
(' ', 1020, ' ')
(' ', 1021, ' ')
('W', 2024, ' ')
('W', 2028, ' ')
('W', 2035, ' ')
('W', 2046, ' ')
('W', 2048, ' ')
('W', 2049, ' ')
('W', 2084, ' ')
('W', 2116, ' ')
('W', 2131, ' ')
('W', 2137, ' ')
('W', 2201, ' ')
('W', 2204, ' ')
('W', 2234, ' ')
('W', 2259, ' ')
('W', 2265, ' ')
('W', 2285, ' ')
('W', 2300, ' ')
('W', 2304, ' ')
('W', 2370, ' ')
('W', 2375, ' ')
('W', 2382, ' ')
('W', 2383, ' ')
(' ', 1101, ' ')
(' ', 1102, ' ')
(' ', 1103, ' ')
(' ', 1104, ' ')
(' ', 1105, ' ')
(' ', 1106, ' ')
(' ', 1107, ' ')
(' ', 1108, ' ')
(' ', 1109, ' ')
(' ', 1110, ' ')
(' ', 1111, ' ')
(' ', 1112, ' ')
(' ', 1113, ' ')
(' ', 1114, ' ')
(' ', 1115, ' ')
(' ', 1116, ' 

In [57]:
res = next(p53_1tup[0]['A'].get_residues())
print(res)

<Residue SER het=  resseq=94 icode= >


In [54]:
for atom in res:
    print(atom, atom.serial_number, atom.element)

<Atom N> 858 N
<Atom CA> 859 C
<Atom C> 860 C
<Atom O> 861 O
<Atom CB> 862 C
<Atom OG> 863 O


In [55]:
print(p53_1tup[0]['A'][94]['CA'])  # WS model, chain, residue, atom

<Atom CA>


In [63]:
ff = p53_1tup[0]['A'][94]['CA']

In [74]:
ff.full_id, ff.coord  # WS

(('P 53 - DNA Binding', 0, 'A', (' ', 94, ' '), ('CA', ' ')),
 array([75.562, 21.797, 80.653], dtype=float32))

In [75]:
# fasta file next