In [9]:
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import silhouette_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer

In [88]:
eps_values = np.arange(0.1, 1.0, 0.1)
min_samples_values = [3, 5, 7, 10]

In [92]:
best_score = -1
best_eps = None
best_min_samples = None

In [93]:
data = pd.read_csv('../../proj/data/SS-N.csv')

X_columns = [col for col in data.columns if not (col.endswith('+') or col.endswith('-') or col.endswith('!'))]
y_columns = [col for col in data.columns if col.endswith('+') or col.endswith('-') or col.endswith('!')]

X = data[X_columns]
y = data[y_columns]

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

In [94]:
for eps_val in eps_values:
    for min_samples_val in min_samples_values:
        dbscan = DBSCAN(eps=eps_val, min_samples=min_samples_val)
        labels = dbscan.fit_predict(X)
        
        # Exclude noise points from silhouette score calculation
        if len(set(labels)) > 1:
            score = silhouette_score(X, labels)
            
            if score > best_score:
                best_score = score
                best_eps = eps_val
                best_min_samples = min_samples_val

print(f"Best silhouette score: {best_score} with eps={best_eps} and min_samples={best_min_samples}")

KeyboardInterrupt: 