In [1]:
import sys
sys.path.append('/home/diego/Myusr/src/UIBCDFGitHub/LabTools')

import MolLabTools as MolLab
import MDLabTools  as MDLab

from copy import deepcopy
import numpy as np
from pdbfixer import PDBFixer
from simtk import openmm, unit
from simtk.openmm import app
import mdtraj
import nglview



# Getting the test system Barnase-Barstar

## Introduction

The complex Barnase Barstar is one of the main workhorses when protein-protein interactions methods needs to be tested. Thereby, the system has being widely studied. Maybe one the last hits in the computational literature regarding this system is the paper REF where the binding kinetics is unveiled using an adaptive sampling approach comined with TRAM.

The Barnase-Barstar used here as a bench test system will be the same used in REF: the monomer Barstar A coming from the pdb 1BRS and the monomer Barnase F fitted by Barstar B from the same pdb.

## Extracting molecules from 1brs.pdb

Getting the 1BRS pdb (downloaded to be stored in pdbs/)

In [2]:
pdbs_dir="pdbs/"

In [3]:
MolLab.Tools.get_pdb_file('1brs',destination=pdbs_dir)

The pdb is loaded

In [4]:
fix_1brs=PDBFixer(pdbs_dir+"1brs.pdb")

Lets check de number of chains. Crystal waters attached to each protein has the same chain id and they have to be removed. (this can be probably done easier)

In [5]:
for chain in fix_1brs.topology.chains():
    print("Chain index {} with pdb id {}".format(chain.id,chain.index))

Chain index A with pdb id 0
Chain index B with pdb id 1
Chain index C with pdb id 2
Chain index D with pdb id 3
Chain index E with pdb id 4
Chain index F with pdb id 5
Chain index A with pdb id 6
Chain index B with pdb id 7
Chain index C with pdb id 8
Chain index D with pdb id 9
Chain index E with pdb id 10
Chain index F with pdb id 11


In [6]:
fix_1brs.removeChains([6,7,8,9,10,11])

The proteins in the pdb need to be fixed. They have missed atoms and residues.

In [7]:
chains = list(fix_1brs.topology.chains())
num_chains = len(chains)

In [8]:
fix_1brs.findMissingResidues()
fix_1brs.findNonstandardResidues()
fix_1brs.findMissingAtoms()

In [9]:
#print(fix_1brs.missingResidues)
#print(fix_1brs.nonstandardResidues)
#print(fix_1brs.missingAtoms)

In [10]:
fix_1brs.addMissingAtoms()

Each chain corresponding to Barnase (A, B or C) and Barstar (D, E, or F) is written in a separate pdb in the same pdbs directory

In [11]:
aux_dict={}
aux_dict["Barnase_A"]=0
aux_dict["Barnase_B"]=1
aux_dict["Barnase_C"]=2
aux_dict["Barstar_D"]=3
aux_dict["Barstar_E"]=4
aux_dict["Barstar_F"]=5

In [12]:
for molecule_name in aux_dict.keys():
    chain_in=aux_dict[molecule_name]
    chains_out=list(range(num_chains))
    chains_out.remove(chain_in)
    globals()[molecule_name]= deepcopy(fix_1brs)
    aux_1brs=globals()[molecule_name]
    aux_1brs.removeChains(chains_out)
    pdbfilename = '1brs_'+molecule_name+'_fixed.pdb'
    outfile = open(pdbs_dir+pdbfilename, 'w')
    app.PDBFile.writeFile(aux_1brs.topology, aux_1brs.positions, outfile)
    outfile.close()
    del(aux_1brs,chain_in,chains_out,outfile)

### Viewing structures coming from 1brs.pdb

In case the previous section was already run we can just load the pdbs corresponding to Barnase (A, B and C) and Barstar(D, E and F).

In [13]:
# With this cell the molecular systems already written in pdbs can be loaded (there is no need to execute previous block).
# Nothing will happen in case the notebook was run from the beginning.

pdbs_dir="pdbs/"
molecule_names=["Barnase_A","Barnase_B","Barnase_C","Barstar_D","Barstar_E","Barstar_F"]

for molecule_name in molecule_names:
    if molecule_name not in globals():
        pdbfilename = '1brs_'+molecule_name+'_fixed.pdb'
        globals()[molecule_name]=app.PDBFile(pdbs_dir+pdbfilename)

Lets define an auxilary function to visualize the proteins:

In [14]:
def make_view(mol_system=None,positions=None):
    topology=mol_system.getTopology()
    if not positions:
        positions=mol_system.getPositions()
    mdtraj_aux_topology = mdtraj.Topology.from_openmm(topology)
    traj_aux = mdtraj.Trajectory(positions/unit.nanometers, mdtraj_aux_topology)
    view = nglview.show_mdtraj(traj_aux)
    view.center()
    return view

The 1BRS pdb has 3 Barnase-Barstar molecular complexes which can be visualized separately:

In [15]:
receptor = Barnase_A
ligand   = Barstar_D
heterodimer_AD=app.Modeller(receptor.topology, receptor.positions)
heterodimer_AD.add(ligand.topology, ligand.positions)

view_AD=make_view(heterodimer_AD)
view_AD.clear()
view_AD.add_cartoon(selection=":A", color='green')
view_AD.add_surface(selection=":A", opacity=0.1)
view_AD.add_cartoon(selection=":B", color='orange')
view_AD.add_surface(selection=":B", opacity=0.1)

view_AD

NGLWidget()

In [16]:
receptor = Barnase_B
ligand   = Barstar_E
heterodimer_BE=app.Modeller(receptor.topology, receptor.positions)
heterodimer_BE.add(ligand.topology, ligand.positions)

view_BE=make_view(heterodimer_BE)
view_BE.clear()
view_BE.add_cartoon(selection=":A", color='purple')
view_BE.add_surface(selection=":A", opacity=0.1)
view_BE.add_cartoon(selection=":B", color='blue')
view_BE.add_surface(selection=":B", opacity=0.1)

view_BE

NGLWidget()

In [17]:
receptor = Barnase_C
ligand   = Barstar_F
heterodimer_CF=app.Modeller(receptor.topology, receptor.positions)
heterodimer_CF.add(ligand.topology, ligand.positions)

view_CF=make_view(heterodimer_CF)
view_CF.clear()
view_CF.add_cartoon(selection=":A", color='red')
view_CF.add_surface(selection=":A", opacity=0.1)
view_CF.add_cartoon(selection=":B", color='yellow')
view_CF.add_surface(selection=":B", opacity=0.1)

view_CF

NGLWidget()

Or together as they were solved in the unit cell:

In [18]:
tri_heterodimer_pdb=app.Modeller(Barnase_A.topology,Barnase_A.positions)
tri_heterodimer_pdb.add(Barnase_B.topology, Barnase_B.positions)
tri_heterodimer_pdb.add(Barnase_C.topology, Barnase_C.positions)
tri_heterodimer_pdb.add(Barstar_D.topology, Barstar_D.positions)
tri_heterodimer_pdb.add(Barstar_E.topology, Barstar_E.positions)
tri_heterodimer_pdb.add(Barstar_F.topology, Barstar_F.positions)


view_tri=make_view(tri_heterodimer_pdb)
view_tri.clear()
view_tri.add_cartoon(selection=":A", color='green')
view_tri.add_surface(selection=":A", opacity=0.1)
view_tri.add_cartoon(selection=":B", color='purple')
view_tri.add_surface(selection=":B", opacity=0.1)
view_tri.add_cartoon(selection=":C", color='red')
view_tri.add_surface(selection=":C", opacity=0.1)
view_tri.add_cartoon(selection=":D", color='orange')
view_tri.add_surface(selection=":D", opacity=0.1)
view_tri.add_cartoon(selection=":E", color='blue')
view_tri.add_surface(selection=":E", opacity=0.1)
view_tri.add_cartoon(selection=":F", color='yellow')
view_tri.add_surface(selection=":F", opacity=0.1)

view_tri

NGLWidget()

### Selecting Barnase and Barstar chains to work with

In case the previous section was already run we can just load the pdbs corresponding to Barnase (A, B and C) and Barstar(D, E and F).

In [19]:
# With this cell the molecular systems already written in pdbs can be loaded (there is no need to execute previous block).
# Nothing will happen in case the notebook was run from the beginning.

pdbs_dir="pdbs/"
molecule_names=["Barnase_A","Barnase_B","Barnase_C","Barstar_D","Barstar_E","Barstar_F"]

for molecule_name in molecule_names:
    if molecule_name not in globals():
        pdbfilename = '1brs_'+molecule_name+'_fixed.pdb'
        globals()[molecule_name]=app.PDBFile(pdbs_dir+pdbfilename)

Lets fit all barnases over Barnase_A. And the same with the barstars taking the monomer B as reference.

In [20]:
MDLab.Tools.OpenMM.LeastRMSDFit(system_ref=Barnase_A, system=Barnase_B, selection='name CA',pbc=False)
MDLab.Tools.OpenMM.LeastRMSDFit(system_ref=Barnase_A, system=Barnase_C, selection='name CA',pbc=False)

MDLab.Tools.OpenMM.LeastRMSDFit(system_ref=Barstar_D, system=Barstar_E, selection='name CA',pbc=False)
MDLab.Tools.OpenMM.LeastRMSDFit(system_ref=Barstar_D, system=Barstar_F, selection='name CA',pbc=False)

In [21]:
receptors = app.Modeller(Barnase_A.topology, Barnase_A.positions)
receptors.add(Barnase_B.topology, Barnase_B.positions)
receptors.add(Barnase_C.topology, Barnase_C.positions)

view_receptors=make_view(receptors)
view_receptors.clear()
view_receptors.add_cartoon(selection=":A", color='green')
view_receptors.add_cartoon(selection=":B", color='purple')
view_receptors.add_cartoon(selection=":C", color='red')
view_receptors

NGLWidget()

In [22]:
ligands = app.Modeller(Barstar_D.topology, Barstar_D.positions)
ligands.add(Barstar_E.topology, Barstar_E.positions)
ligands.add(Barstar_F.topology, Barstar_F.positions)

view_receptors=make_view(receptors)
view_receptors.clear()
view_receptors.add_cartoon(selection=":A", color='green')
view_receptors.add_cartoon(selection=":B", color='purple')
view_receptors.add_cartoon(selection=":C", color='red')
view_receptors

NGLWidget()

Finally, the chains A and F are written as the pdbs `pdbs/Barnase.pdb` and `pdbs/Barstar.pdb` for future use in pynterpred tests.

In [23]:
Barnase = Barnase_A
Barstar = Barstar_F

In [24]:
pdbfilename = 'Barnase.pdb'
outfile = open(pdbs_dir+pdbfilename, 'w')
app.PDBFile.writeFile(Barnase.topology, Barnase.positions, outfile)

pdbfilename = 'Barstar.pdb'
outfile = open(pdbs_dir+pdbfilename, 'w')
app.PDBFile.writeFile(Barstar.topology, Barstar.positions, outfile)

Before finishing, lets visualize the complex selected.

In [25]:
Barnase_Barstar = app.Modeller(Barnase.topology, Barnase.positions)
Barnase_Barstar.add(Barstar.topology, Barstar.positions)


view=make_view(Barnase_Barstar)
view.clear()
view.add_cartoon(selection=":A", color='green')
view.add_surface(selection=":A", opacity=0.1)
view.add_cartoon(selection=":B", color='yellow')
view.add_surface(selection=":B", opacity=0.1)
view

NGLWidget()