In [21]:
from sklearn.cluster import DBSCAN
from sklearn.datasets import make_moons
import numpy as np

In [22]:
# Generate crescent moons
X, _ = make_moons(n_samples=400, noise=0.05, random_state=0)

# Far outliers
far_outliers = np.random.uniform(low=-2, high=3, size=(30, 2))

# Partial outliers (near clusters)
partial_outliers = np.random.normal(loc=[1.5, 1.0], scale=0.3, size=(30, 2))

# Combine all points
X_all = np.vstack([X, far_outliers, partial_outliers])

In [23]:
from numpy import unique
from sklearn.metrics import silhouette_score

In [None]:
def objective(trial):

    model = DBSCAN(eps=0.2, min_samples=5)

    model.fit(X_all)

    actual_clusters = model.labels_

    # if desired for viewing
    dbscan_outliers_indices = []

    for index, sample_label in enumerate(model.labels_):
        if sample_label == -1:
            while sample_label in actual_clusters:
                sample_label += 1 #UPDATING THE NEW label
            actual_clusters[index] = sample_label #UPDATING THE ACTUAL CLUSTERS
            dbscan_outliers_indices.append(index)

    score = silhouette_score(X_all, actual_clusters)

    outlier_frame = X_all[dbscan_outliers_indices]

    return score

In [29]:
import optuna


study = optuna.create_study()


study.optimize(objective, n_trials = 3)

[I 2025-09-30 19:40:44,989] A new study created in memory with name: no-name-f3feece5-d1e4-4a73-82c7-fa4567c3fd9a


[I 2025-09-30 19:40:45,089] Trial 0 finished with value: -0.33151046534897993 and parameters: {}. Best is trial 0 with value: -0.33151046534897993.
[I 2025-09-30 19:40:45,100] Trial 1 finished with value: -0.33151046534897993 and parameters: {}. Best is trial 0 with value: -0.33151046534897993.
[I 2025-09-30 19:40:45,107] Trial 2 finished with value: -0.33151046534897993 and parameters: {}. Best is trial 0 with value: -0.33151046534897993.
