## Experiment #1
Speed comparison with random search

In [None]:
from skopt import dummy_minimize
from autocluster import AutoCluster
from algorithms import algorithms

In [None]:
import time
import warnings

import numpy as np
import matplotlib.pyplot as plt

from sklearn import cluster, datasets, mixture
from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler
from itertools import cycle, islice

np.random.seed(0)

In [None]:
# ============
# Generate datasets. We choose the size big enough to see the scalability
# of the algorithms, but not too big to avoid too long running times
# ============
n_samples = 1500
noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,
                                      noise=.05)
noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)
blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)
no_structure = np.random.rand(n_samples, 2), None

# Anisotropicly distributed data
random_state = 170
X, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)
transformation = [[0.6, -0.6], [-0.4, 0.8]]
X_aniso = np.dot(X, transformation)
aniso = (X_aniso, y)

# blobs with varied variances
varied = datasets.make_blobs(n_samples=n_samples,
                             cluster_std=[1.0, 2.5, 0.5],
                             random_state=random_state)

In [None]:
def visualize_data(data):
    points = data
    plt.scatter(points[:, 0], points[:, 1])
    plt.show()

In [None]:
visualize_data(X)
visualize_data(X_aniso)
visualize_data(varied[0])
visualize_data(noisy_circles[0])
visualize_data(noisy_moons[0])

In [None]:
def fit_model(cfg):
    # convert cfg into a dictionary
    cfg = {k : cfg[k] for k in cfg if cfg[k]}

    # remove keys with value == None
    cfg_subset = {k: v for k, v in cfg.items() if v is not None}

    # get the model chosen
    algorithm = Mapper.getClass(cfg_subset["algorithm_choice"])

    # pop "algorithm_choice" key from the dictionary
    cfg_subset.pop("algorithm_choice", None)

    # decode the encoded parameters
    cfg_subset_decoded = {StringUtils.decode_parameter(k, algorithm.name): v for k, v in cfg_subset.items()}

    # build model
    model = algorithm.model(**cfg_subset_decoded)
    model.fit(scaled_data)

def evaluate_model(cfg):
    candidate_model = fit_model(cfg)

    if hasattr(candidate_model, 'labels_'):
        y_pred = candidate_model.labels_.astype(np.int)
    else:
        y_pred = candidate_model.predict(scaled_data)

    if len(set(y_pred)) == 1:
        return 1
    else:
        return -1 * metrics.silhouette_score(scaled_data, y_pred, metric='euclidean')