# Pre-fixing the trajectories

In [5]:
import itertools
import os
import glob

In [None]:
import gromacs
from gromacs.tools import Trjconv

- `traj_notes` are a list of names for each system e.g. 4HFI_pH46_ethanol
- `default_load_location` is the root directory of the systems
- `default_load_location` is where to save these "fixed" trajectories.
    - The idea behind is seperating the "fixed" trajectories to another location
- `default_skip` is how many frames to skip from the raw traj
- `default_rep` is the number of replicates for a system.

- TRAJ_NOTE format:

A.B_C_...Z

- A: strucuture name or PDB.
- B: mutation
- C: main ligand
- Z: interesting variables

e.g. 
- GABA_WT_PBT_POPC
- 4HFI_F238L_ethanol_pH46

In [2]:
traj_notes = ['URRO_pH3_dens_pH46_amber_largerbox', 'URRO_pH3_dens_pH70_amber_largerbox','URRO_pH5_dens_pH46_amber_largerbox','URRO_pH5_dens_pH70_amber_largerbox','URRO_pH7_dens_pH46_amber_largerbox','URRO_pH7_dens_pH70_amber_largerbox','4NPQ_pH70_amber_largerbox']
default_load_location = '/home/scottzhuang/eriklab/'
default_save_location = '/home/scottzhuang/pdc/'
default_skip = 25
default_rep = 4

In [9]:
traj_note_dic = {'traj_note': traj_notes, 
                 'load_location': ["".join(i) for i in itertools.product([default_load_location], traj_notes,['/production/'])],
                 'save_location':[default_save_location] * len(traj_notes), 
                 'skip':[default_skip] * len(traj_notes),
                 'rep': [4] * len(traj_notes)}

In [10]:
traj_note_dic

{'load_location': ['/home/scottzhuang/eriklab/URRO_pH3_dens_pH46_amber_largerbox/production/',
  '/home/scottzhuang/eriklab/URRO_pH3_dens_pH70_amber_largerbox/production/',
  '/home/scottzhuang/eriklab/URRO_pH5_dens_pH46_amber_largerbox/production/',
  '/home/scottzhuang/eriklab/URRO_pH5_dens_pH70_amber_largerbox/production/',
  '/home/scottzhuang/eriklab/URRO_pH7_dens_pH46_amber_largerbox/production/',
  '/home/scottzhuang/eriklab/URRO_pH7_dens_pH70_amber_largerbox/production/',
  '/home/scottzhuang/eriklab/4NPQ_pH70_amber_largerbox/production/'],
 'rep': [4, 4, 4, 4, 4, 4, 4],
 'save_location': ['/home/scottzhuang/pdc/',
  '/home/scottzhuang/pdc/',
  '/home/scottzhuang/pdc/',
  '/home/scottzhuang/pdc/',
  '/home/scottzhuang/pdc/',
  '/home/scottzhuang/pdc/',
  '/home/scottzhuang/pdc/'],
 'skip': [25, 25, 25, 25, 25, 25, 25],
 'traj_note': ['URRO_pH3_dens_pH46_amber_largerbox',
  'URRO_pH3_dens_pH70_amber_largerbox',
  'URRO_pH5_dens_pH46_amber_largerbox',
  'URRO_pH5_dens_pH70_amber_

- create the directory in `save location`
    - e.g. '/home/scottzhuang/pdc/URRO_pH3_dens_pH46_amber_largerbox/rep1'

In [None]:
for i in range(0,len(traj_note_dic['traj_note'])):
    try:
        os.mkdir(traj_note_dic['save_location'][i] + traj_note_dic['traj_note'][i])
        for j in range(0,4):
            os.mkdir(traj_note_dic['save_location'][i] + traj_note_dic['traj_note'][i] + '/rep' + str(j+1))
    except:
        print(traj_note_dic['traj_note'][i],'exists')

In [19]:
def pre_fixing(traj_note, save_location,load_location, rep_num, skip):
    try:
        tpr_file = load_location + 'rep' + str(rep_num) + '/md.tpr'
        xtc_file = load_location + 'rep' + str(rep_num) + '/md.xtc'
        output_file = save_location + traj_note + '/rep' + str(rep_num)
        trjconv = Trjconv(s = tpr_file, f = xtc_file, dump = 10, o= output_file + '/md.system.gro',input = '0')
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = xtc_file, dump = 10, o= output_file + '/md.protein.gro',input = '1')
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = xtc_file, dump = 10, o= output_file + '/md.backbone.gro',input = '5')
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = xtc_file, dump = 10, o= output_file + '/md.ca.gro',input = '3')
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = xtc_file, dump = 10, o= output_file + '/md.system.pdb',input = '0')
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = xtc_file, dump = 10, o= output_file + '/md.protein.pdb',input = '1')
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = xtc_file, dump = 10, o= output_file + '/md.backbone.pdb',input = '5')
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = xtc_file, dump = 10, o= output_file + '/md.ca.pdb',input = '3')
        trjconv.run()

        trjconv = Trjconv(s = tpr_file, f = xtc_file, skip = skip, o= output_file + '/skip.xtc',input = '0')
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = output_file + '/skip.xtc', o= output_file + '/whole.xtc',input = '0', pbc = 'whole')
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = output_file + '/whole.xtc', pbc = 'nojump', center = 'yes', o= output_file + '/nojump.xtc',input = ('3','0'))
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = output_file + '/nojump.xtc', pbc = 'res', ur = 'rect', center = 'yes', o= output_file + '/center.xtc',input = ('3','0'))
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = output_file + '/center.xtc', fit = 'rot+trans', o= output_file + '/rotrans.xtc',input = ('3','0'))
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = output_file + '/rotrans.xtc', o= output_file + '/md.skip' + str(skip) + '.ca.xtc',input = '3')
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = output_file + '/rotrans.xtc', o= output_file + '/md.skip' + str(skip) + '.system.xtc',input = '0')
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = output_file + '/rotrans.xtc', o= output_file + '/md.skip' + str(skip) + '.protein.xtc',input = '1')
        trjconv.run()
        trjconv = Trjconv(s = tpr_file, f = output_file + '/rotrans.xtc', o= output_file + '/md.skip' + str(skip) + '.backbone.xtc',input = '5')
        trjconv.run()
        for f in glob.glob(output_file + '/center*'):
            os.remove(f)
        for f in glob.glob(output_file + '/rotrans*'):
            os.remove(f)
        for f in glob.glob(output_file + '/nojump*'):
            os.remove(f)
        for f in glob.glob(output_file + '/whole*'):
            os.remove(f)
        for f in glob.glob(output_file + '/skip*'):
            os.remove(f)
        for f in glob.glob(output_file + '/*skip5*'):
            os.remove(f)
        for f in glob.glob(output_file + '/#*'):
            os.remove(f)
        try:
            os.mkdir(output_file + '/analysis/')
        except:
            print('')
        try:
            os.mkdir(output_file + '/hbond_analysis/')
        except:
            print('')
        try:
            os.mkdir(output_file + '/pore_analysis/')
        except:
            print('')
    except:
        print(load_location + ' not found.')

In [23]:
from joblib import Parallel, delayed
import multiprocessing
num_cores = multiprocessing.cpu_count()
Parallel(n_jobs=num_cores)(delayed(pre_fixing)(traj_note = traj_note_dic['traj_note'][i//4], 
                                               save_location = traj_note_dic['save_location'][i//4], 
                                               load_location = traj_note_dic['load_location'][i//4],
                                               skip = traj_note_dic['skip'][i//4], 
                                               rep_num = str(i%4 + 1))
                          for i in range(0, len(traj_note_dic['traj_note']) * 4))

[None, None, None, None]

- append all replicates for one system together--for either visulization

In [79]:
def append_replicates(traj_note,location,skip=default_skip,rep_num = default_rep):
    if rep_num > 1:
        output_file = location + traj_note + '/rep' + str(1) + '/' + 'md'
        traj_ca = md.load(output_file + '.skip' + str(skip) + ".ca.xtc",top = output_file + '.ca.pdb')
        traj_protein = md.load(output_file + '.skip' + str(skip) + ".protein.xtc",top = output_file + '.protein.pdb')
        traj_system = md.load(output_file + '.skip' + str(skip) + ".system.xtc",top = output_file + '.system.pdb')
        for rep in range(2,rep_num + 1):
            output_file = location + traj_note + '/rep' + str(rep) + '/' + 'md'
            traj_ca = traj_ca.join(md.load(output_file + '.skip' + str(skip) + ".ca.xtc",top = output_file + '.ca.pdb'))
            traj_protein = traj_protein.join(md.load(output_file + '.skip' + str(skip) + ".protein.xtc",top = output_file + '.protein.pdb'))
            traj_system = traj_system.join(md.load(output_file + '.skip' + str(skip) + ".system.xtc",top = output_file + '.system.pdb'))
        traj_ca[0].save_pdb(location + traj_note + '/' + traj_note + '.ca.pdb')
        traj_ca.save_xtc(location + traj_note + '/' + traj_note + '.ca.xtc')
        traj_protein[0].save_pdb(location + traj_note + '/' + traj_note + '.protein.pdb')
        traj_protein.save_xtc(location + traj_note + '/' + traj_note + '.protein.xtc')
        traj_system[0].save_pdb(location + traj_note + '/' + traj_note + '.system.pdb')
        traj_system.save_xtc(location + traj_note + '/' + traj_note + '.system.xtc')

In [None]:
for i in range(0,len(traj_note_dic['traj_note'])):
    append_replicates(traj_note = traj_note_dic['traj_note'][i], location = traj_note_dic['save_location'][i], skip = traj_note_dic['skip'][i], rep_num = traj_note_dic['rep'][i])