In [1]:
# import packages and tools
import sys, os
import numpy as np
from numpy.linalg import eig
from tools_box import *
from estimators import *

In [2]:
# decide bin_min, bin_max and number of states
nstates = 100
bin_min = 0.0
bin_max = 3.1

In [3]:
# Clustering original data based on the bin
if (1):
    print "clustering..."
    cm = [] # create an empty count matrix
    for i in range(20):     # 20 parallel trajectories available
        t = np.load('../distance_%d.npy'%i)     # load pre-computed COM distances
        print i
        a = cluster_bin(t,bin_min,bin_max, nstates)    # clustering, equivalent to np.digitize
        cm.append(a)   
    cm = np.array(cm)   # get a numpy array format for this matrix
    np.save('clustered_matrix_%d.npy'%nstates,cm)  # saving
    print cm, np.array(cm).shape    # make sure the shape is (n,k) where n is number of trajectories, k is number of snapshots, in this case it should be (20,60001) 
    print "Done!"


clustering...
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
[[72 54 50 ..., 24 28 20]
 [72 48 42 ..., 45 76 39]
 [72 67 68 ...,  0  1  0]
 ..., 
 [72 31  1 ..., 40 40 18]
 [72 50 50 ...,  1  0  1]
 [72 55 57 ...,  0  1  1]] (20, 60001)
Done!


In [4]:
# create implied timescale files
fout = open('implied_timescales.dat','w')
header = '# tau(ns)\tround\timplied_timescale(ns)'
#print header
fout.write(header+'\n')


In [5]:
# construct raw transition count matrix
bootstrap = True
r = 2   # 2 rounds of bootstrapping
sm = 4  # 4 slow motions to compute



In [6]:
if (1):
    if bootstrap:
        for i in range(r):
            new_cm = []   # create a new count matrix
            traj = np.random.choice(cm.shape[0],cm.shape[0])   # randomly pick up 20 trajectories (sample with replacement)
            print traj
            for j in traj:
                new_cm.append(cm[j])  
            new_cm = np.array(new_cm) # make this new matrix in numpy array format

            print "constructing transition count matrix..."
            tau_values = [1,10,100,150,200,300,400,500,600,1000,1500,2000,2500,3000]  # lagtimes
            for tau in tau_values:
                tau_in_ns = tau*0.1   # tau is in unit of ns
                print "lagtime = ", tau_in_ns,'ns'
                tcm = construct_raw_tcm(new_cm,tau,nstates)  # construct transition count matrix based on tau
                tcm[tcm==0.] = 0.0000001   # add pseudocount if there is any zero count
                np.save('raw_tcm_tau%d_r%d.npy'%(tau,i),tcm)    # saving
                # construct transition probability matrix
                print "constructing transition probability matrix..."
#                tpm = construct_tpm(tcm)
                tpm = MLE_tProb_reversible(tcm)    # using MLE estimators
                np.save('tpm_%d.npy'%tau,tpm)
                print "computing eigenvalues and eigenvectors..."
                try:
                    mu, eigenvectors = eig(tpm.transpose())    # compute eigenvalues and eigenvectors
                    #print "mu", mu, "eigenvectors",eigenvectors
                    for j in range(sm):
                        implied_timescale_in_ns = -1.0*tau_in_ns/np.log(mu[j+1])   
                        outstr = '%6.3f\t%d\t%6.3f'%(tau_in_ns, i, implied_timescale_in_ns)  # implied timescales for specific tau for ith round bootstrap
                        #print 'writing data...'
                        fout.write(outstr+'\n')
                except:
                    'Skipped'
                #fout.write(outstr+'\n')
        print "Done!"
        fout.close()
        
    # if bootstrap is not used
    else:
        k = 1    # only 1 round 
        print "constructing transition count matrix..."
        tau_values = [1,10,100,150,200,300,400,500,600,1000,1500,2000,2500,3000]
        for tau in tau_values:
            tau_in_ns = tau*0.1
            print "lagtime = ", tau_in_ns,'ns'
            cm = 'clustered_matrix.npy'
            tcm = construct_raw_tcm(cm,tau,nstates)
            tcm[tcm==0.] = 0.0000001 
            np.save('raw_tcm_%d.npy'%tau,tcm)
            # construct transition probability matrix
            print "constructing transition probability matrix..."
#            tpm = construct_tpm(tcm)
            tpm = MLE_tProb_reversible(tcm)
            np.save('tpm_%d.npy'%tau,tpm)
            print "computing eigenvalues and eigenvectors..."
            try:
                mu, eigenvectors = eig(tpm.transpose())
              #  print "mu", mu, "eigenvectors",eigenvectors
                for j in range(sm):
                    implied_timescale_in_ns = -1.0*tau_in_ns/np.log(mu[j+1])
                    outstr = '%6.3f\t%d\t%6.3f'%(tau_in_ns, k, implied_timescale_in_ns)
                    fout.write(outstr+'\n')
            except:
                'Skipped'
        print "Done!"
        fout.close()

[ 6  2 16  9  6  4 15  2  7 12 18  2  0  3  9  9  1  7 14  3]
constructing transition count matrix...
lagtime =  0.1 ns
constructing transition probability matrix...
computing eigenvalues and eigenvectors...
writing data...
writing data...
writing data...
writing data...
lagtime =  1.0 ns
constructing transition probability matrix...
computing eigenvalues and eigenvectors...
writing data...
writing data...
writing data...
writing data...
lagtime =  10.0 ns
constructing transition probability matrix...
computing eigenvalues and eigenvectors...
writing data...
writing data...
writing data...
writing data...
lagtime =  15.0 ns
constructing transition probability matrix...
computing eigenvalues and eigenvectors...
writing data...
writing data...
writing data...
writing data...
lagtime =  20.0 ns
constructing transition probability matrix...
computing eigenvalues and eigenvectors...
writing data...
writing data...
writing data...
writing data...
lagtime =  30.0 ns
constructing transition pr




writing data...
writing data...
writing data...
writing data...
lagtime =  40.0 ns
constructing transition probability matrix...
computing eigenvalues and eigenvectors...
writing data...
writing data...
writing data...
writing data...
lagtime =  50.0 ns
constructing transition probability matrix...
computing eigenvalues and eigenvectors...
writing data...
writing data...
writing data...
writing data...
lagtime =  60.0 ns
constructing transition probability matrix...
computing eigenvalues and eigenvectors...
writing data...
writing data...
writing data...
writing data...
lagtime =  100.0 ns
constructing transition probability matrix...
computing eigenvalues and eigenvectors...
writing data...
writing data...
writing data...
writing data...
lagtime =  150.0 ns
constructing transition probability matrix...
computing eigenvalues and eigenvectors...
writing data...
writing data...
writing data...
writing data...
lagtime =  200.0 ns
constructing transition probability matrix...
computing ei

In [10]:
# show implied timescales file
with open("implied_timescales.dat") as f:
        lines=f.readlines()
for line in lines:
    print line


# tau(ns)	round	implied_timescale(ns)

 0.100	0	 3.590

 0.100	0	 0.072

 0.100	0	 0.049

 0.100	0	 0.037

 1.000	0	 3.627

 1.000	0	 0.490

 1.000	0	 0.159

 1.000	0	 0.158

10.000	0	 4.901

10.000	0	 3.668

10.000	0	 2.449

10.000	0	 1.541

15.000	0	 7.352

15.000	0	 2.327

15.000	0	 3.801

15.000	0	 3.620

20.000	0	 9.803

20.000	0	 3.081

20.000	0	 3.077

20.000	0	 3.068

30.000	0	14.704

30.000	0	 7.291

30.000	0	   nan

30.000	0	   nan

40.000	0	19.606

40.000	0	 9.755

40.000	0	 6.118

40.000	0	 9.527

50.000	0	24.507

50.000	0	12.505

50.000	0	   nan

50.000	0	12.131

60.000	0	29.409

60.000	0	 9.270

60.000	0	14.227

60.000	0	14.138

100.000	0	49.014

100.000	0	25.306

100.000	0	15.514

100.000	0	24.967

150.000	0	73.522

150.000	0	   nan

150.000	0	35.571

150.000	0	   nan

200.000	0	98.029

200.000	0	   nan

200.000	0	52.937

200.000	0	48.220

250.000	0	122.536

250.000	0	38.481

250.000	0	38.308

250.000	0	38.068

300.000	0	147.043

300.000	0	76.236

300.000	0	   nan

300.0

In [None]:
# plot implied timescales
print "plotting figures..."
from matplotlib import pyplot as plt
%matplotlib inline

color=['red','blue','green','magenta']
if bootstrap:
    data = np.loadtxt('implied_timescales.dat')
    tau_values = [1,10,100,150,200,300,400,500,600,1000,1500,2000,2500,3000]  # double check if this tau values are the same as above shown
    lagtime=[[] for i in range(sm)]
    implied=[[] for i in range(sm)]
    err=[[] for i in range(sm)]
    for j in range(sm):
        for i in range(len(tau_values)):    # if this part is too hard to understand, I suggest load the implied timescales file in an ipython window and do these operation then it will be clear
            lagtime[j].append(data[(j+i*sm)::len(tau_values)*sm][0][0])  # append lagtime for each slow motion
            implied[j].append(np.mean(data[(j+i*sm)::len(tau_values)*sm][::,2]))  # append averaged implied timescale between different rounds bootstrapping for each lagtime
            err[j].append(np.std(data[(j+i*sm)::len(tau_values)*sm][::,2]))      # append standard deviation of implied timescales between different round bootstrapping for each lagtime

    for i in range(sm):
        plt.plot(lagtime[i],implied[i],color=color[i])
        plt.fill_between(lagtime[i],np.array(implied[i])+np.array(err[i]),np.array(implied[i])-np.array(err[i]),color=color[i],alpha=0.2)
        plt.yscale('log')
        plt.xlabel('lag time (ns)')
        plt.ylabel('implied timescale (ns)')
        plt.savefig('implied_timescale_%d.pdf'%nstates)
    print 'Done!'

else:
    data = np.loadtxt('implied_timescales.dat')
    lagtime=[[] for i in range(sm)]
    implied=[[] for i in range(sm)]
    err=[[] for i in range(sm)]
    for j in range(sm):
        for i in range(len(tau_values)):
            lagtime[j].append(data[(j+i*sm)::len(tau_values)*sm][0][0])
            implied[j].append(np.mean(data[(j+i*sm)::len(tau_values)*sm][::,2]))
            err[j].append(np.std(data[(j+i*sm)::len(tau_values)*sm][::,2]))

    for i in range(sm):
        plt.plot(lagtime[i],implied[i],color=color[i])
        plt.yscale('log')
        plt.xlabel('lag time (ns)')
        plt.ylabel('implied timescale (ns)')
        plt.savefig('implied_timescale_%d.pdf'%nstates)
    print 'Done!'
