# 07. Molecules CG Mapping 

In this tutorial, we show how to generate a CG mapping matrix for a molecule given a bead distribution. The trajectory and topology file come from a AA simulation done in gromacs (see `Molecules_CG_Mapping` folder). The protein is FF (diphenylalanine) and the solvent is a mixture of water and methanol.

In [1]:
# disable GPU
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import hoomd, hoomd.htf as htf, hoomd.md
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import MDAnalysis as mda
from os import path
import pickle

In [2]:
# Loading inputs
TPR = 'Molecules_CG_Mapping/nvt_prod.tpr'
tpr = mda.Universe(TPR)
TRAJECTORY = 'Molecules_CG_Mapping/traj.trr'
u = mda.Universe(TPR, TRAJECTORY)

# Generating Mapping Matrix for FF
protein_FF = u.select_atoms("resname PHE and resid 0:1")
Beads_distribution = [['N','H1','H2','H3'],
                     ['CA','HA','CB','HB1','HB2'],
                     ['CG','CD1','HD1','CD2','HD2','CE1','HE1','CE2','HE2','CZ','HZ'],
                     ['C','O'],
                     ['N','H'],
                     ['CA','HA','CB','HB1','HB2'],
                     ['CG','CD1','HD1','CD2','HD2','CE1','HE1','CE2','HE2','CZ','HZ'],
                     ['C','O1','O2']]
mapping_FF = htf.matrix_mapping(protein_FF,Beads_distribution)
print (mapping_FF)

  'this file.'.format(filename))


[[0.8224383  0.05918723 0.05918723 0.05918723 0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.44409524 0.03726984
  0.44409524 0.03726984 0.03726984 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.15577257 0.15577257 0.01307

In [3]:
# Generating Mapping Matrix for Water
water = u.select_atoms("resname SOL and resid 500")
Beads_distribution = [['OW','HW1','HW2']]
mapping_water = htf.matrix_mapping(water,Beads_distribution)             
print (mapping_water)

[[0.88809574 0.05595213 0.05595213]]


In [4]:
# Generating Mapping Matrix for Methanol
methanol = u.select_atoms("resname MET and resid 11665 ")
Beads_distribution_methanol = [['C','H','H','H','OA','HO']]
mapping_methanol = htf.matrix_mapping(methanol,Beads_distribution_methanol)             
print (mapping_methanol)

[[0.37484707 0.03145832 0.03145832 0.03145832 0.49931966 0.03145832]]


In [5]:
# Getting the segment id of each molecule in topology
_,idx = np.unique(u.select_atoms('all').segids,return_index=True)
seg_id_list = u.select_atoms('all').segids[np.sort(idx)].tolist()

# Getting the list of every molecule type name in topology
_,idx = np.unique(u.atoms.resnames,return_index=True)
resname_list = u.atoms.resnames[np.sort(idx)].tolist()

# Getting list of atoms in each type of molecule
atoms_in_molecule_list = [protein_FF.names,
                          water.names,
                          methanol.names]

In [6]:
# molecule_list_indexed = htf.find_molecules_from_topology(u,atoms_in_molecule_list)
# print (molecule_list_indexed)

## Building the model graph

We build an LJ potential with unknown, trainable parameters (`epsilon`, `sigma`) which start out at 0.9 and 1.1. We then obtain forces from our potential and the simulation. Force matching is used to modify the LJ potential until the forces agree. We use Keras layers to implement the trainable parameters. We'll make our starting parameters $\epsilon=1.2$ and $\sigma=0.9$. The goal is train them to reach $\sigma, \epsilon = 1.0$

In [22]:
class LJLayer(tf.keras.layers.Layer):
    def __init__(self, sig, eps):
        super().__init__(self, name='lj')
        self.start = [sig, eps]
        self.w = self.add_weight(
            shape=[2],
            initializer=tf.constant_initializer([sig, eps]),
            constraint=tf.keras.constraints.NonNeg())

    def call(self, r):
        r6 = tf.math.divide_no_nan(self.w[1]**6, r**6)
        energy = self.w[0] * 4.0 * (r6**2 - r6)
        # divide by 2 to remove double count
        return energy / 2.
    
class TrainableLJ(htf.SimModel):
    def setup(self):
        self.lj = LJLayer(0.9, 1.2)

    def compute(self, nlist):
        # get r
        r = htf.safe_norm(tensor=nlist[:, :, :3], axis=2)
        p_energy = self.lj(r)
        energy = tf.reduce_sum(input_tensor=p_energy, axis=1)
        forces = htf.compute_nlist_forces(nlist, energy)
        return forces, self.lj.w, energy
    
class TrajModel(htf.SimModel):
    def setup(self):
        self.avg_chrdf = tf.keras.metrics.MeanTensor()
        self.avg_ohrdf = tf.keras.metrics.MeanTensor()
    def compute(self, nlist, positions):                
        # pairwise energy. Double count -> divide by 2
        inv_r6 = htf.nlist_rinv(nlist)**6
        p_energy = 4.0 / 2.0 * (inv_r6 * inv_r6 - inv_r6)
        # sum over pairwise energy
        energy = tf.reduce_sum(p_energy, axis=1)
        # get forces
        forces = htf.compute_nlist_forces(nlist, energy)
        
        # now get RDF
        # For reference, type indices in this case are: {C:0, H:1, N:2, O:3} 
        # compute C-H RDF
        chrdf = htf.compute_rdf(
            nlist, [0, 15], positions[:, 3], 
            nbins=20, type_i=0, type_j=1)
        # compute O-H RDF
        ohrdf = htf.compute_rdf(
            nlist,[0, 15],  positions[:, 3], 
            nbins=20, type_i=3, type_j=1)
        # average the RDFs
        self.avg_chrdf.update_state(chrdf)
        self.avg_ohrdf.update_state(ohrdf)
        return forces
    
# define model
class MappingModel(htf.SimModel):
    def setup(self, CG_NN, cg_mapping, rcut):
        self.CG_NN = CG_NN
        self.rcut = rcut
        self.cg_mapping = cg_mapping
        self.avg_cg_rdf = tf.keras.metrics.MeanTensor()
        self.avg_aa_rdf = tf.keras.metrics.MeanTensor()
    def compute(self, nlist, positions, box):
        # calculate the center of mass of a CG bead
        box_size = htf.box_size(box)
        mapped_pos = htf.center_of_mass(positions[:,:3], self.cg_mapping, box_size)
        # create the mapped neighbot list
        mapped_nlist = htf.compute_nlist(mapped_pos, self.rcut, self.CG_NN, box_size, True)
        # compute RDF for mapped and C-C in all-atom
        cg_rdf = htf.compute_rdf(mapped_nlist, [0.1,self.rcut])
        aa_rdf = htf.compute_rdf(nlist, [0.1,self.rcut], positions[:,3], type_i=3, type_j=3)
        self.avg_cg_rdf.update_state(cg_rdf)
        self.avg_aa_rdf.update_state(aa_rdf)
        return
model = TrajModel(256)
    


## Run from trajectory

We load the trajectory with `MDAnalysis` and then call the `iter_from_trajectory` command. It runs over the trajectory computing the graph and constructs neighborlists according to the r_cut. 

In [20]:
for inputs, ts in htf.iter_from_trajectory(256, u,selection='resname PHE', r_cut=25):
    result = model(inputs)
    aa_pos = inputs[1]
    box = inputs[2]
#     aa_pos = inputs[1]    
#     mapped_pos = ....
#     mapped_forces = ...
#     mapped_nlist = htf.compute_nlist(mapped_pos, ...)
#     model_inputs = [mapped_pos, mapped_nlist] + inputs[2:] # get box,  other items
#     loss = my_model.train_on_batch(x = model_inputs, y = mapped_forces)
#     print(loss)
print (np.shape(aa_pos))
print (box)

(860, 4)
[[78.98509979  0.          0.        ]
 [ 0.         78.98509979  0.        ]
 [ 0.          0.         78.98509979]]


In [15]:
print (np.shape(result))

(1, 860, 4)


In [21]:
print (78.98509979/2)

39.492549895
