In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import SpectralClustering
import matplotlib.pyplot as plt
import numpy as np

In [None]:
data = pd.read_excel(r"E:\Energy.xlsx")

X = data.iloc[:, 1].values.reshape(-1, 1) 

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

sil_scores = []
K_range = range(2, 11)

for k in K_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    labels = kmeans.fit_predict(X_scaled)
    sil_scores.append(silhouette_score(X_scaled, labels))

plt.figure(figsize=(8, 6))
plt.plot(K_range, sil_scores, marker='o', linestyle='--', color='orange')
plt.title('Silhouette Score for Optimal k')
plt.xlabel('Number of Clusters')
plt.ylabel('Silhouette Score')
plt.grid(True)
plt.show()

In [None]:
print("Cluster number (k) \t Profile coefficient")
for k, score in zip(K_range, sil_scores):
    print(f"{k}\t\t{score:.4f}")

In [None]:
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score

sil_scores = []
ch_scores = []
db_scores = []

K_range = range(2, 11)
for k in K_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    labels = kmeans.fit_predict(X_scaled)
    sil_scores.append(silhouette_score(X_scaled, labels))
    ch_scores.append(calinski_harabasz_score(X_scaled, labels))
    db_scores.append(davies_bouldin_score(X_scaled, labels))

print("簇数(k) | Silhouette | Calinski-Harabasz | Davies-Bouldin")
for k, sil, ch, db in zip(K_range, sil_scores, ch_scores, db_scores):
    print(f"{k:<8} | {sil:.4f}     | {ch:.2f}             | {db:.4f}")

In [None]:
from sklearn.metrics import silhouette_score

silhouette_avg = silhouette_score(X_scaled, labels)
print(f"Silhouette Score：{silhouette_avg:.4f}")

In [None]:
silhouette_vals = silhouette_samples(X_scaled, labels)
avg_score = silhouette_score(X_scaled, labels)

plt.figure(figsize=(10, 6))
colors = cm.Set3(np.linspace(0, 1, n_clusters))
bar_height = 0.8

for i in range(n_clusters):
    ith_cluster_silhouette_values = silhouette_vals[labels == i]
    
    ith_cluster_silhouette_values = ith_cluster_silhouette_values[ith_cluster_silhouette_values > 0]
    ith_cluster_silhouette_values.sort()
    
    if len(ith_cluster_silhouette_values) == 0:
        continue

    y = np.linspace(i - bar_height / 2, i + bar_height / 2, len(ith_cluster_silhouette_values))
    
    plt.fill_betweenx(y,
                      0, ith_cluster_silhouette_values,
                      facecolor=colors[i],
                      edgecolor=colors[i],
                      alpha=0.6)  

plt.axvline(x=avg_score, color="red", linestyle="--")

plt.title("Silhouette Plot of Spectral Clustering", fontsize=14)
plt.xlabel("Silhouette Coefficient", fontsize=12)
plt.ylabel("Cluster Label", fontsize=12)
plt.yticks(range(n_clusters), [f"Cluster {i}" for i in range(n_clusters)])
plt.xlim(0, 1)
plt.ylim(-1, n_clusters)
plt.grid(True, axis='x', linestyle='--', alpha=0.2)
plt.tight_layout()
plt.savefig(r"E:\Contour analysis diagram.png", dpi=600)
plt.show()

sil = silhouette_score(X_scaled, labels)
ch = calinski_harabasz_score(X_scaled, labels)
db = davies_bouldin_score(X_scaled, labels)
print(f"Spectral Cluster evaluation：Silhouette={sil:.4f} | CH={ch:.2f} | DBI={db:.4f}")

data['Cluster'] = labels

for cluster in range(n_clusters):
    print(f"\n The molecular sequence numbers of LCMs in cluster {cluster} :")
    cluster_LCMs = data[data['Cluster'] == cluster].iloc[:, 0].values
    print(cluster_LCMs)

output_path = r"E:\Clustering_Results.xlsx"
data.to_excel(output_path, index=False)

In [None]:
import pandas as pd

file_path = r"E:\Clustering_Results.xlsx"
data = pd.read_excel(file_path)

cluster_avg = data.groupby('Cluster')[data.columns[1]].mean().reset_index()
cluster_avg.columns = ['Cluster', 'Average_Toxicity']

cluster_avg = cluster_avg.sort_values(by='Average_Toxicity', ascending=True)

cluster_avg['Score'] = range(8, 0, -1)  

data = data.merge(cluster_avg[['Cluster', 'Score']], on='Cluster', how='left')

output_path = r"E:\Clustering_Results_with_Scores.xlsx"
data.to_excel(output_path, index=False)

In [None]:
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.preprocessing import StandardScaler

kmeans = KMeans(n_clusters=3, random_state=42)
cluster_avg['Group'] = kmeans.fit_predict(cluster_avg[['Average_Toxicity']])

X_cluster_avg_scaled = StandardScaler().fit_transform(cluster_avg[['Average_Toxicity']])
group_labels = cluster_avg['Group'].values

sil = silhouette_score(X_cluster_avg_scaled, group_labels)
ch = calinski_harabasz_score(X_cluster_avg_scaled, group_labels)
db = davies_bouldin_score(X_cluster_avg_scaled, group_labels)

print(f"KMeans：Silhouette={sil:.4f} | CH={ch:.2f} | DBI={db:.4f}")

In [None]:
data = data.merge(cluster_avg[['Cluster', 'Group']], on='Cluster', how='left')

output_path_kmeans = r"E:\Clustering_Results_with_Groups_KMeans.xlsx"
data.to_excel(output_path_kmeans, index=False)