In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import OPTICS
from metrics.internal_metrics import compute_all_metrics

In [6]:
data = pd.read_csv('dataset/Dortmund_features_preprocessed.csv', index_col=0)
features = data.values
subject_ids = data.index.values

print(f"Data shape: {features.shape}")
print(f"Number of subjects: {len(subject_ids)}")
print(f"Number of features: {features.shape[1]}")

Data shape: (600, 100)
Number of subjects: 600
Number of features: 100


In [7]:
clustering = OPTICS(min_samples=5, xi=0.05, min_cluster_size=0.05)
clustering.fit(features)
labels = clustering.labels_ #-1 = noise

In [8]:
unique_labels = set(labels)
n_noise = list(labels).count(-1)
n_clusters = len(unique_labels) - (1 if -1 in unique_labels else 0)

print(f"Numero di cluster trovati (escluso noise): {n_clusters}")
print(f"Numero di punti marcati come noise: {n_noise}")



Numero di cluster trovati (escluso noise): 1
Numero di punti marcati come noise: 0


In [None]:
mask_inliers = labels != -1
features_inliers = features[mask_inliers]
labels_inliers = labels[mask_inliers]

metrics_optics = compute_all_metrics(features_inliers, labels_inliers, include_dunn=False)

print(f"Silhouette (solo inliers): {metrics_optics['silhouette_score']:.4f}")
print(f"Davies-Bouldin (solo inliers): {metrics_optics['davies_bouldin_score']:.4f}")
print(f"Calinski-Harabasz (solo inliers): {metrics_optics['calinski_harabasz_score']:.2f}")