In [None]:
from msmbuilder.io import load_trajs, save_trajs, save_generic
from msmbuilder.decomposition import tICA
import numpy as np
import os

def run_tica(featdir,no_of_traj,ticadir,index_dir=None,use_sample=True):
    if index_dir==None and use_sample==False:
         raise Exception("Please provide the directories for your oASIS ouput, or set use_sample=True")
    """
    Running tlCA based on feature selection from Spectral oASIS and save the output

    Parameters
    ----------
    featdir : string
        Directory of full features
    no_of_traj : int
        Number of trajectorues from MD simulation
    ticadir : string
        Directory for saving tlCA output

    Optional Parameters
    -------------------
    use_sample: Bool, Default=True
        Whether to use our provided output from Spectral oASIS for tlCA

    index_dir: str, Default=ticadir
        Directory for the column index from Spectral oASIS
    """
    #Setting up the parameters
    no_of_features=[800,1000,'Full']
    tica_lagtime=[[1500],[1250,1500],[1500]] # the tlCA lag time when running tlCA with different feature size
    #Load Data
    pairwise_distance=[]
    for i in range(no_of_traj):
        temp=np.load("{}features/{}.npy".format(featdir,i))
        pairwise_distance.append(temp)
    pairwise_distance=np.array(pairwise_distance)


    for n,nth in enumerate(no_of_features):
        for m in range(0,len(tica_lagtime[n])):
            ftrajs={}
            if use_sample==True:
                index_dir=ticadir
                if nth != 'Full':
                    columns=np.loadtxt("{}oASIS_sample/feature_column{}_ticalag_{}.txt".format(index_dir,
                                                                                               nth,tica_lagtime[n][m]),dtype=int)
                    for i in range(len(pairwise_distance)):
                        ftrajs[i]=pairwise_distance[i][:,columns]
                elif nth == 'Full':
                    for i in range(len(pairwise_distance)):
                        ftrajs[i]=pairwise_distance[i]
            elif use_sample==False:
                if nth != 'Full':
                    columns=np.loadtxt("{}feature_column{}_ticalag_{}.txt".format(index_dir,
                                                                                  nth,tica_lagtime[n][m]),dtype=int)
                    for i in range(len(pairwise_distance)):
                        ftrajs[i]=pairwise_distance[i][:,columns]
                elif nth == 'Full':
                    for i in range(len(pairwise_distance)):
                        ftrajs[i]=pairwise_distance[i]

            tica = tICA(n_components=10, lag_time=tica_lagtime[n][m], kinetic_mapping=True)
    ## Fit
            tica.fit(ftrajs.values())

    ## Transform
            ttrajs = {}
            for k, v in ftrajs.items():
                ttrajs[k] = tica.partial_transform(v)

            os.system("mkdir -p {}{}/tica_lag{}".format(ticadir,nth,int(tica_lagtime[n][m]/5)))
            for keys in ttrajs:
                np.save("{}{}/tica_lag{}/{}.npy".format(ticadir,nth,int(tica_lagtime[n][m]/5),keys),ttrajs[keys])
            save_generic(tica, "{}{}/tica_lag{}/tica.pickl".format(ticadir,nth,int(tica_lagtime[n][m]/5)))
            
#We are using our sample output for Spectral oASIS to run this part of the code
#If you want to use your own output from Spectral oASIS, 
#please specify the directory in the run_tica(index_dir='dir_your_data') 
ticadir="./TICA/"
featdir="./Featurization/"
no_of_traj=84
run_tica(featdir,no_of_traj,ticadir,use_sample=False)

In [None]:
from msmbuilder.io.sampling import sample_dimension