In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, davies_bouldin_score
from pyemma.coordinates import tica
import hdbscan
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Step 1: load data
loaded = np.load('tri_sin_phi_data.npz')
tri_sin_phi = loaded['tri_sin_phi']

data = tri_sin_phi[:, 1:7]   
phi1 = tri_sin_phi[:, 7]    
phi2 = tri_sin_phi[:, 8]     
phi3 = tri_sin_phi[:, 9]    
                     
print(f"data shape: {data.shape}")
print(f"tri_sin_phi shape: {tri_sin_phi.shape}")

In [None]:
# ============================================================
# Step 2: Eigenvalue Evolution vs Lag Time
#   scans lag = 10, 60, 110, ..., 460
#   fits tICA (dim=2) on original data
# ============================================================
lags = np.arange(10, 500, 50)

eigenvalues_orig = []

for lag in lags:
    model_orig = tica(lag=lag, dim=2)
    model_orig.fit(data)
    eigenvalues_orig.append(model_orig.eigenvalues)

eigenvalues_orig = np.array(eigenvalues_orig)

# --- save ---
os.makedirs('lag',  exist_ok=True)
np.savetxt('lag/lag_and_eigenvalues_evolution-orin3.txt',
           np.column_stack((lags, eigenvalues_orig)))

# --- plot: original ---
plt.figure(figsize=(10, 6))
for i in range(eigenvalues_orig.shape[1]):
    plt.plot(lags, eigenvalues_orig[:, i], marker='o', label=f'Eigenvalue {i+1}')
plt.xlabel('Lag Time')
plt.ylabel('Eigenvalues')
plt.title('Eigenvalue Evolution vs Lag Time (Original)')
plt.legend()
plt.grid(True)
plt.savefig('eigenvalues_evolution_original.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Step 3: TICA + K-means
n_clusters = 4 

TICA = tica(lag=150, dim=2)
tica_dr = TICA.fit_transform(data)

kmeans_tica = KMeans(n_clusters=n_clusters, random_state=42, n_init=20)
labels_tica = kmeans_tica.fit_predict(tica_dr)

# To use HDBSCAN, uncomment the lines below and comment out the
# preceding KMeans lines.
# hdbscan_tica = hdbscan.HDBSCAN(min_cluster_size=1000, min_samples=200, core_dist_n_jobs=6, cluster_selection_method='eom', gen_min_span_tree=True)
# labels_tica = hdbscan_tica.fit_predict(tica_dr)

In [None]:
# Step 4: Visualization Settings
import matplotlib as mpl
mpl.rcParams['axes.linewidth'] = 2         
mpl.rcParams['xtick.major.size'] = 4       
mpl.rcParams['xtick.major.width'] = 2     
mpl.rcParams['ytick.major.size'] = 4    
mpl.rcParams['ytick.major.width'] = 2      
mpl.rcParams['xtick.direction'] = 'in'     
mpl.rcParams['ytick.direction'] = 'in'     
mpl.rcParams['font.size'] = 14          
mpl.rcParams['savefig.dpi'] = 300  

In [None]:
# Step 5: Visualization
# --- TICA space ---
plt.figure(figsize=(8, 6))
plt.scatter(tica_dr[:, 0], tica_dr[:, 1], c=labels_tica, alpha=1, s=1, cmap='tab20')
plt.xlabel('dCV1', fontsize=16)
plt.ylabel('dCV2', fontsize=16)
cbar = plt.colorbar()
cbar.outline.set_linewidth(1.5)              
cbar.ax.tick_params(width=1.5, length=4)     
cbar.ax.tick_params(labelsize=12)          
plt.savefig('tica_kmeans_reduced_space.png', dpi=300, bbox_inches='tight')
print("save: tica_kmeans_reduced_space.png")
plt.show()

# --- phi1 vs phi2 ---
plt.figure(figsize=(8, 6))
plt.scatter(phi1, phi2, c=labels_tica, alpha=1, s=1, cmap='tab20')
plt.xlabel('φ1 (degrees)', fontsize=16)
plt.ylabel('φ2 (degrees)', fontsize=16)
plt.xlim(-180, 180)
plt.ylim(-180, 180)
cbar = plt.colorbar()
cbar.outline.set_linewidth(1.5)            
cbar.ax.tick_params(width=1.5, length=4)     
cbar.ax.tick_params(labelsize=12)         
plt.savefig('tica_kmeans_phi1_phi2.png', dpi=300, bbox_inches='tight')
print("save: tica_kmeans_phi1_phi2.png")
plt.show()

# --- phi1 vs phi3 ---
plt.figure(figsize=(8, 6))
plt.scatter(phi1, phi3, c=labels_tica, alpha=1, s=1, cmap='tab20')
plt.xlabel('φ1 (degrees)', fontsize=16)
plt.ylabel('φ3 (degrees)', fontsize=16)
plt.xlim(-180, 180)
plt.ylim(-180, 180)
cbar = plt.colorbar()
cbar.outline.set_linewidth(1.5)             
cbar.ax.tick_params(width=1.5, length=4)    
cbar.ax.tick_params(labelsize=12)        
plt.savefig('tica_kmeans_phi1_phi3.png', dpi=300, bbox_inches='tight')
print("save: tica_kmeans_phi1_phi3.png")
plt.show()

# --- phi2 vs phi3 ---
plt.figure(figsize=(8, 6))
plt.scatter(phi2, phi3, c=labels_tica, alpha=1, s=1, cmap='tab20')
plt.xlabel('φ2 (degrees)', fontsize=16)
plt.ylabel('φ3 (degrees)', fontsize=16)
plt.xlim(-180, 180)
plt.ylim(-180, 180)
cbar = plt.colorbar()
cbar.outline.set_linewidth(1.5)            
cbar.ax.tick_params(width=1.5, length=4)  
cbar.ax.tick_params(labelsize=12)         
plt.savefig('tica_kmeans_phi2_phi3.png', dpi=300, bbox_inches='tight')
print("save: tica_kmeans_phi2_phi3.png")
plt.show()
