# import library

In [None]:
import pyemma
pyemma.__version__
import os
import itertools
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import glob
import MDAnalysis as mda
import MDAnalysis.transformations
from tqdm import tqdm_notebook as tqdm
import pyemma.coordinates as coor
import warnings

# conduct PCA

In [None]:
all_xtc=sorted(glob.glob(f"gerbil_main/*/*/amd_noPBC.xtc"))
top=f"./gerbil_main/input/em.gro"
print(top,len(all_xtc))

In [None]:
traj_c_alpha=[]
ref = mda.Universe(top)
ref_c_alpha = ref.select_atoms("name CA")
for traj in tqdm(all_xtc):
    u = mda.Universe(top, traj)
    c_alpha = u.select_atoms("name CA")
    transform = mda.transformations.fit_rot_trans(c_alpha, ref_c_alpha ,weights="mass")
    u.trajectory.add_transformations(transform)
    for ts in (u.trajectory):
        traj_c_alpha.append(list(itertools.chain.from_iterable(c_alpha.positions)))
print(np.shape(traj_c_alpha))

In [None]:
from sklearn.decomposition import PCA
pca = PCA()
pca.fit(traj_c_alpha)
init_pca = pca.transform(traj_c_alpha)

In [None]:
plt.plot([0] + list(np.cumsum(pca.explained_variance_ratio_))[:10], linewidth=0, marker='o', markersize=3, color='black')
print(pca.explained_variance_ratio_[:10])

# projection onto 2D pc subspace

In [None]:
for traj in tqdm(all_xtc):
        place=traj.replace("amd_noPBC.xtc","")
        u = mda.Universe(top,traj)
        c_alpha = u.select_atoms("name CA")
        transform = mda.transformations.fit_rot_trans(c_alpha, ref_c_alpha ,weights="mass")
        u.trajectory.add_transformations(transform)
        traj_c_alpha=[]
        for ts in u.trajectory:
            traj_c_alpha.append(list(itertools.chain.from_iterable(c_alpha.positions)))
        pca_result=pca.transform(traj_c_alpha)
        np.savetxt(f"{place}RC.txt",np.array([pca_result[:,0],pca_result[:,1]]).T)    

In [None]:
all_reader=[]
for index,name in enumerate(sorted(glob.glob(f"./gerbil_main/*/*/RC.txt"))):
    all_reader.append(np.loadtxt(name))
all_XYZ=all_reader
all_X=np.vstack(all_XYZ)[:,0]
all_Y=np.vstack(all_XYZ)[:,1]
print(np.shape(all_X),np.shape(all_Y))

In [None]:
def conduct_pca(name):
    ref = mda.Universe(f"gerbil_main/input/em.gro")
    ref_c_alpha = ref.select_atoms("name CA")
    u = mda.Universe(name,name)
    c_alpha = u.select_atoms("name CA")
    transform = mda.transformations.fit_rot_trans(c_alpha, ref_c_alpha ,weights="mass")
    u.trajectory.add_transformations(transform)
    traj_c_alpha=[]
    for ts in (u.trajectory):
            traj_c_alpha.append(list(itertools.chain.from_iterable(c_alpha.positions)))
    pca_result=pca.transform(traj_c_alpha)  
    return [pca_result[:,0],pca_result[:,1]]

In [None]:
op="./gerbil_main/input/open_nowater.gro"
cl="./gerbil_main/input/closed_nowater.gro"
op_pca=(conduct_pca(op))
cl_pca=(conduct_pca(cl))

In [None]:
fig = plt.figure() 
matplotlib.rcParams.update({'font.size': 15})
plt.figure(figsize=(6,5))
plt.scatter(op_pca[0],op_pca[1],c="red",s=50,label="open",zorder=999)
plt.scatter(cl_pca[0],cl_pca[1],c="blue",s=50,label="closed")
plt.plot(all_X,all_Y,marker="o",linestyle='None',ms=0.5,alpha=0.7)
plt.xlabel("PC1 [Å]")
plt.ylabel("PC2 [Å]")
plt.legend()
plt.savefig("PCA.png",dpi=300)

# clustering with regspace

In [None]:
clustering = coor.cluster_regspace(all_XYZ,dmin=4.3, max_centers=200)
dtrajs = clustering.dtrajs
cc_x = clustering.clustercenters[:,0]
cc_y = clustering.clustercenters[:,1]
print(np.shape(cc_x),np.shape(cc_y))
plt.figure(figsize=(6,6))
plt.plot(all_X,all_Y,marker="o",linestyle='None',ms=0.7,alpha=0.2)
plt.plot(cc_x,cc_y,marker="o",linestyle='None',ms=3.0)
plt.scatter(op_pca[0],op_pca[1],c="red",s=50,label="open",zorder=999)
plt.scatter(cl_pca[0],cl_pca[1],c="blue",s=50,label="closed",zorder=999)
plt.xlabel("PC1 [Å]")
plt.ylabel("PC2 [Å]")
plt.savefig("PCA_cluster.png",dpi=300)

# convert xtc to gro

In [None]:
traj=sorted(all_xtc)
print(len(traj))

In [None]:
cluster_centers = clustering.sample_indexes_by_cluster(range(200),1)

In [None]:
for index, cc in enumerate(cluster_centers):
    feat = pyemma.coordinates.featurizer("./gerbil_main/input/em.gro")
    traj_reader = pyemma.coordinates.source(traj[cc[0][0]], features=feat)
    print(traj[cc[0][0]],index)
    coor.save_traj(traj_reader, [[0, cc[0][1]]],f"./resampling/initial_structures/{index+1}.gro")

In [None]:
! ./resampling/initial_structures/trjconv.sh 