In [1]:
import mdtraj as md
import nglview as nv
import numpy as np
import pandas as pd

import os
import requests



### Download the file for Estrogen receptor with pdb id 1QKU

In [2]:
def fetch_pdb(pdb_id, download_path="./"):
        if len(pdb_id) != 4:
            raise ValueError("Invalid pdb id")
        url = 'http://files.rcsb.org/download/{}.pdb'.format(pdb_id)
        try:
            res = requests.get(url, allow_redirects=True)
        except:
            print("Could not fetch pdb from {}".format(url))
            return 
        
        file_path = os.path.join(download_path, pdb_id + ".pdb")
        with open(file_path, "wb") as f:
            f.write(res.content)

In [3]:
fetch_pdb("1QKU")

### Visualizing Estrogen receptor

In [4]:
view = nv.show_structure_file("./1QKU.pdb")
view

NGLWidget()

## Inspecting the topology of estrogen receptor

In [5]:
traj = md.load("1QKU.pdb")
topology = traj.topology
print(traj)
print(topology)

<mdtraj.Trajectory with 1 frames, 6596 atoms, 1343 residues, and unitcells>
<mdtraj.Topology with 9 chains, 1343 residues, 6596 atoms, 6123 bonds>


In [6]:
# Select just protein.
protein_inxs = topology.select("protein")
protein_traj = traj.atom_slice(protein_inxs)
protein_topo = protein_traj.topology
print(protein_traj)
print(protein_topo)

<mdtraj.Trajectory with 1 frames, 5940 atoms, 744 residues, and unitcells>
<mdtraj.Topology with 3 chains, 744 residues, 5940 atoms, 6054 bonds>


In [8]:
# Split protein into its 3 chains
protein_1 = protein_traj.atom_slice(protein_topo.select("chainid == 0"))
protein_2 = protein_traj.atom_slice(protein_topo.select("chainid == 1"))
protein_3 = protein_traj.atom_slice(protein_topo.select("chainid == 2"))
print(protein_1)
print(protein_2)
print(protein_3)

<mdtraj.Trajectory with 1 frames, 1990 atoms, 250 residues, and unitcells>
<mdtraj.Trajectory with 1 frames, 1975 atoms, 247 residues, and unitcells>
<mdtraj.Trajectory with 1 frames, 1975 atoms, 247 residues, and unitcells>


In [42]:
# Save receptor to file
protein_1.save_pdb("./receptor_1.pdb")

In [43]:
view = nv.show_mdtraj(protein_1)
view

NGLWidget()

In [25]:
view = nv.show_mdtraj(protein_traj.atom_slice(protein_topo.select("chainid 1 to 2")))
view

NGLWidget()

## Compare the sequence of each protein

In [10]:
prot_1_sequence = [res.name for res in protein_1.topology.residues]
prot_2_sequence = [res.name for res in protein_2.topology.residues]
prot_3_sequence = [res.name for res in protein_3.topology.residues]

# Protein 2 and 3 have 247 resiudes. Protein 1 has 250. List must be of same length
# for pandas dataframe

prot_2_sequence += ["", "", ""]
prot_3_sequence += ["", "", ""]

sequences = {
    "Protein_1": prot_1_sequence,
    "Protein_2": prot_2_sequence,
    "Protein_3": prot_3_sequence,
}

sequences = pd.DataFrame.from_dict(sequences)

In [11]:
sequences.head(10)

Unnamed: 0,Protein_1,Protein_2,Protein_3
0,SER,ASN,ASN
1,LYS,SER,SER
2,LYS,LEU,LEU
3,ASN,ALA,ALA
4,SER,LEU,LEU
5,LEU,SER,SER
6,ALA,LEU,LEU
7,LEU,THR,THR
8,SER,ALA,ALA
9,LEU,ASP,ASP


In [12]:
from itertools import combinations

protein_num = [1, 2, 3]
for i, j in combinations(protein_num, 2):
    sequences[f"Prot_{i} == Prot_{j}"] = np.where(sequences[f"Protein_{i}"] == sequences[f"Protein_{j}"], True, False)

sequences.head()

Unnamed: 0,Protein_1,Protein_2,Protein_3,Prot_1 == Prot_2,Prot_1 == Prot_3,Prot_2 == Prot_3
0,SER,ASN,ASN,False,False,True
1,LYS,SER,SER,False,False,True
2,LYS,LEU,LEU,False,False,True
3,ASN,ALA,ALA,False,False,True
4,SER,LEU,LEU,False,False,True


In [40]:
prot_1_backbone = protein_1.atom_slice(protein_1.topology.select("backbone"))
prot_2_backbone = protein_2.atom_slice(protein_2.topology.select("backbone"))
prot_3_backbone = protein_3.atom_slice(protein_3.topology.select("backbone"))
print(prot_1_backbone)
print(prot_2_backbone)
print(prot_3_backbone)

<mdtraj.Trajectory with 1 frames, 1000 atoms, 250 residues, and unitcells>
<mdtraj.Trajectory with 1 frames, 988 atoms, 247 residues, and unitcells>
<mdtraj.Trajectory with 1 frames, 988 atoms, 247 residues, and unitcells>


In [14]:
# help(mdtraj.core.topology.Residue)

In [41]:
rmsd = md.rmsd(prot_2_backbone, prot_3_backbone, 0)
rmsd

array([0.04460048], dtype=float32)