# WS_ch08E.ipynb
## WESmith 04/21/23
# PERFORMING GEOMETRIC OPERATIONS
### (see book code in Chapter08/Mass.py)
### WS created this notebook to follow along with code from the book
### 'Bioinformatics with Python Cookbook' by Tiago Antao¶
### Each recipe will have its own notebook, suffixed by A, B, etc.¶

In [None]:
import numpy as np
import pandas as pd
from Bio import PDB
import os

In [None]:
data_dir  = 'data'

In [None]:
repository = PDB.PDBList()
parser     = PDB.PDBParser()

In [None]:
repository.retrieve_pdb_file('1TUP', pdir=data_dir, file_format='pdb')

In [None]:
pdb1tup_file = os.path.join(data_dir, 'pdb1tup.ent')
p53_1tup     = parser.get_structure('P 53', pdb1tup_file)

In [None]:
my_residues = set()
for residue in p53_1tup.get_residues():
    my_residues.add(residue.id[0])
print(my_residues)

In [None]:
def get_mass(atoms, accept_fun=lambda atom: atom.parent.id[0] != 'W'):
    return sum([atom.mass for atom in atoms if accept_fun(atom)])

In [None]:
chain_names = [chain.id for chain in p53_1tup.get_chains()]
chain_names

In [None]:
my_mass = np.ndarray((len(chain_names), 3))
my_mass

In [None]:
for i, chain in enumerate(p53_1tup.get_chains()):
    my_mass[i, 0] = get_mass(chain.get_atoms())
    my_mass[i, 1] = get_mass(chain.get_atoms(), accept_fun=lambda atom: atom.parent.id[0] not in [' ', 'W'])
    my_mass[i, 2] = get_mass(chain.get_atoms(), accept_fun=lambda atom: atom.parent.id[0] == 'W')
masses = pd.DataFrame(my_mass, index=chain_names, columns=['No Water', 'Zincs', 'Water'])
masses

In [None]:
def get_center(atoms, weight_fun=lambda atom: 1 if atom.parent.id[0] != 'W' else 0):
    xsum = ysum = zsum = 0.0
    acum = 0.0
    for atom in atoms:
        x, y, z = atom.coord
        weight = weight_fun(atom)
        acum += weight
        xsum += weight * x
        ysum += weight * y
        zsum += weight * z
    return xsum / acum, ysum / acum, zsum / acum

In [None]:
# WS geometric center
print(get_center(p53_1tup.get_atoms()))

In [None]:
# WS center of mass: clever use of lambda as attribute
# WS note: hydgrogen's not included in x-ray data, so centers and masses are a little off
print(get_center(p53_1tup.get_atoms(),
                 weight_fun=lambda atom: atom.mass if atom.parent.id[0] != 'W' else 0))

In [None]:
my_center = np.ndarray((len(chain_names), 6))
for i, chain in enumerate(p53_1tup.get_chains()):
    x, y, z = get_center(chain.get_atoms())
    my_center[i, 0] = x
    my_center[i, 1] = y
    my_center[i, 2] = z
    x, y, z = get_center(chain.get_atoms(), weight_fun=lambda atom: atom.mass if atom.parent.id[0] != 'W' else 0)
    my_center[i, 3] = x
    my_center[i, 4] = y
    my_center[i, 5] = z
weights = pd.DataFrame(my_center, index=chain_names, columns=['X', 'Y', 'Z', 'X (Mass)', 'Y (Mass)', 'Z (Mass)'])
weights