<h1>Mean First Passage Time<h1>

In [None]:
import numpy as np
import os

os.makedirs('./Analysis',exist_ok=True)

def move_row_col(matrix, i):
    tmp = np.delete(matrix, i, axis=0)
    move_matrix = np.delete(tmp, i, axis=1)
    return move_matrix


def mfpt(tpm,lag_time):
    """
    Calculating MFPT from a N*N transition probability matrix.
    The program will row-normalize the input TPM before calculation.

    Parameters
    ----------
    tpm: ndarray
        a N*N transition probability matrix
    lag_time: int
        The lag time for outputing TPM. 
        The MPFT calculated will have the same unit as lag time.
        
    Return
    ----------
    mfpt: ndarray
        a num_state*num_state ndarray containing MFPT, 
        which the element at the i-th row and j-th column 
        represent MFPT from i-th state to j-th state
    
    References
    ----------
    ..[1]  Singhal, N., Vijay P.S.(2005).Error analysis and efficient sampling in Markovian state models for molecular dynamics.
           J. Chem. Phys. 123, 204909

    """
    #normalizing TPM
    num_state = tpm.shape[0]
    tpm=np.where(tpm<0, 0, tpm)
    tpm_norm=np.zeros(shape=(num_state,num_state))
    for j in range(num_state):
        for i in range(num_state):
            tpm_norm[i][j]=tpm[i][j]/sum(tpm[j])
    #mfpt calculation
    mfpt_a = np.zeros((num_state*(num_state-1), num_state*(num_state-1)))
    mfpt_b = np.ones(num_state*(num_state-1))
    for j in range(num_state):
        index_ini = j*(num_state-1)
        index_end = index_ini + num_state -1
        mfpt_a[index_ini:index_end, index_ini:index_end] = move_row_col(tpm_norm, j)
    mfpt_a -= np.identity(num_state*(num_state-1))
    mfpt_b =np.ones(num_state*(num_state-1)) * -lag_time
    mfpt = np.linalg.solve(mfpt_a, mfpt_b)
    n=0
    for i in range(num_state):
        mfpt=np.insert(mfpt,n,0)
        n+=int(num_state+1)
    mfpt=mfpt.reshape(num_state,num_state)
    print(mfpt)
    return mfpt

TPM=np.loadtxt('./qMSM/qMSM_Propagate_TPM.txt')
num_state = 4
TPM = np.reshape(TPM, (len(TPM), num_state, num_state))
lag_time=200 #200 steps and unit=0.1ps
TPM=TPM[-1]
sample_mfpt=mfpt(TPM,lag_time)



<h1>Macrostate Sampling<h1>

In [10]:
import numpy as np
import mdtraj as md
import sys
import optparse
import linecache
from glob import glob
import re 

def sample_macrostate(trajDir="./trajs/",topfile = "./trajs/ala2.pdb",
                      lumped_assignment='./qMSM/lumping_assignment.npy',
                      analysis_dir='./Analysis/',no_of_sample=10):
    """
    Sample structures from macrostate randomly and save as PDB files. 
    An index file containing the origin of the conformations will also be generated.
    
    Parameters
    ----------
    trajDir : str, Default="./trajs/"
        Directory containing the MD trajectories
        
    topfile : str, Default=trajDir+"npt_fit.pdb"
        Directory containing the topology for MD trajectories
        
    lumped_assignment: str, Default='./qMSM/lumping_assignment.npy'
        Directory containing the macrostate assignment
    
    analysis_dir: str , Default='./analysis/',
        Directory for saving the output 
    
    no_of_sample: int, Deafult=10
        Number of structures sampled from each macrostate
    
    """
    trajlist=glob(trajDir+"*.xtc")
    trajlist.sort(key=lambda f: int(re.sub('\D', '', f)))

    macro_assignment=np.load(lumped_assignment)
    macro_assignment=np.reshape(macro_assignment,(100,100001))
    states=np.unique(macro_assignment)

    out=open("{}sampled_pdb_list.txt".format(analysis_dir),"wt")
    out.write("macro_state\toutput_filename\ttraj_index\tframe_index\n")
    for i in states:
        b=np.argwhere(macro_assignment==i)
        random_index=np.random.choice(b.shape[0],size=no_of_sample)
        b=b[random_index]
        for j,n in enumerate(b):
            output_filename=analysis_dir+"macro%d_sample%d.pdb" % (i,j)
            out.write(("%f\t%s\t%s\t%d\n" % (i, str(output_filename), trajlist[n[0]],  n[1])))
            traj = md.load_frame(trajlist[n[0]], n[1] , top=topfile)
            traj.save_pdb(output_filename)

        
analysis_dir='./Analysis/'           
trajDir="./trajs/"
topfile = trajDir+"ala2.pdb"
lumped_assignment='./qMSM/lumping_assignment.npy'
no_of_sample=10
sample_macrostate(trajDir,topfile,lumped_assignment,analysis_dir,no_of_sample)