In [None]:
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import seaborn as sns
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering, KMeans
from sklearn.datasets import make_classification
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.linear_model import LogisticRegression

In [None]:
plt.style.use(["seaborn-white", "seaborn-paper"])
sns.set_palette(["#404040", "#f0b67f", "#57b6a2"])
sns.set_context("talk")

In [None]:
X, y = make_classification(
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_repeated=0,
    n_classes=2,
    n_clusters_per_class=1,
    scale=10,
    random_state=1,
)

In [None]:
fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(X[:, 0], X[:, 1], c="#404040", edgecolor="white")
xlim, ylim = ax.get_xlim(), ax.get_ylim()

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_0.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
classifier = LogisticRegression().fit(X, y)


fig, ax = plt.subplots(figsize=(7.5, 5))

decision_boundary = DecisionBoundaryDisplay.from_estimator(
    classifier, X, plot_method="contour", response_method="predict", ax=ax,
)
ax.scatter(
    X[:, 0],
    X[:, 1],
    c=y,
    edgecolor="white",
    cmap=mcolors.LinearSegmentedColormap.from_list(
        "", ["#f0b67f", "#57b6a2"],
    ),
)

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_1.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
kmeans = KMeans(n_clusters=2, random_state=1).fit(X)

In [None]:
fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(X[:, 0], X[:, 1], c="#404040", edgecolor="white")
ax.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    c=["#f0b67f", "#57b6a2"],
    s=200,
    edgecolor="white",
    marker="X",
)

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_2.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c=kmeans.labels_,
    edgecolor="white",
    cmap=mcolors.LinearSegmentedColormap.from_list(
        "", ["#f0b67f", "#57b6a2"],
    ),
)
ax.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    c=["#f0b67f", "#57b6a2"],
    s=200,
    edgecolor="white",
    marker="X",
)

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_3.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
centroids = np.asarray([[-100, 0], [-100, 15]])
kmeans = KMeans(
    n_clusters=2, init=centroids, n_init=1, max_iter=1, random_state=1,
).fit(X)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(X[:, 0], X[:, 1], c="#404040", edgecolor="white")
ax.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    c=["#f0b67f", "#57b6a2"],
    s=200,
    edgecolor="white",
    marker="X",
)

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_4a.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c=kmeans.labels_,
    edgecolor="white",
    cmap=mcolors.LinearSegmentedColormap.from_list(
        "", ["#f0b67f", "#57b6a2"],
    ),
)
ax.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    c=["#f0b67f", "#57b6a2"],
    s=200,
    edgecolor="white",
    marker="X",
)

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_4b.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
kmeans_prev = kmeans
centroids = kmeans.cluster_centers_
kmeans = KMeans(
    n_clusters=2, init=centroids, n_init=1, max_iter=1, random_state=1,
).fit(X)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c=kmeans_prev.labels_,
    edgecolor="white",
    cmap=mcolors.LinearSegmentedColormap.from_list(
        "", ["#f0b67f", "#57b6a2"],
    ),
)
ax.scatter(
    centroids[:, 0],
    centroids[:, 1],
    c=["#f0b67f", "#57b6a2"],
    s=200,
    edgecolor="white",
    marker="X",
)
ax.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    c=["#f0b67f", "#57b6a2"],
    s=200,
    edgecolor="white",
    marker="X",
)
for i, c in zip(range(len(centroids)), ["#f0b67f", "#57b6a2"]):
    ax.plot(
        [centroids[i, 0], kmeans.cluster_centers_[i, 0]],
        [centroids[i, 1], kmeans.cluster_centers_[i, 1]],
        c=c,
        ls="--",
    )

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_5a.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c=kmeans.labels_,
    edgecolor="white",
    cmap=mcolors.LinearSegmentedColormap.from_list(
        "", ["#f0b67f", "#57b6a2"],
    ),
)
ax.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    c=["#f0b67f", "#57b6a2"],
    s=200,
    edgecolor="white",
    marker="X",
)

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_5b.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
kmeans_prev = kmeans
centroids = kmeans.cluster_centers_
kmeans = KMeans(
    n_clusters=2, init=centroids, n_init=1, max_iter=1, random_state=1,
).fit(X)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c=kmeans.labels_,
    edgecolor="white",
    cmap=mcolors.LinearSegmentedColormap.from_list(
        "", ["#f0b67f", "#57b6a2"],
    ),
)
ax.scatter(
    centroids[:, 0],
    centroids[:, 1],
    c=["#f0b67f", "#57b6a2"],
    s=200,
    edgecolor="white",
    marker="X",
)
ax.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    c=["#f0b67f", "#57b6a2"],
    s=200,
    edgecolor="white",
    marker="X",
)
for i, c in zip(range(len(centroids)), ["#f0b67f", "#57b6a2"]):
    ax.plot(
        [centroids[i, 0], kmeans.cluster_centers_[i, 0]],
        [centroids[i, 1], kmeans.cluster_centers_[i, 1]],
        c=c,
        ls="--",
    )

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_6a.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c=kmeans.labels_,
    edgecolor="white",
    cmap=mcolors.LinearSegmentedColormap.from_list(
        "", ["#f0b67f", "#57b6a2"],
    ),
)
ax.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    c=["#f0b67f", "#57b6a2"],
    s=200,
    edgecolor="white",
    marker="X",
)

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_6b.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
X, y = make_classification(
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_repeated=0,
    n_classes=3,
    n_clusters_per_class=1,
    scale=10,
    class_sep=2.0,
    random_state=3,
)

In [None]:
centroids = np.asarray([[5, 0], [20, -5], [12.5, 5]])
kmeans = KMeans(
    n_clusters=3, init=centroids, n_init=1, max_iter=100, random_state=1,
).fit(X)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c="#404040",
    edgecolor="white",
)
ax.scatter(
    centroids[:, 0],
    centroids[:, 1],
    c=["#f0b67f", "#57b6a2", "#ee266d"],
    s=200,
    edgecolor="white",
    marker="X",
)

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_7a.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
centroids = np.asarray([[5, 0], [20, -5], [12.5, 5]])
kmeans = KMeans(
    n_clusters=3, init=centroids, n_init=1, max_iter=100, random_state=1,
).fit(X)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c=kmeans.labels_,
    edgecolor="white",
    cmap=mcolors.LinearSegmentedColormap.from_list(
        "", ["#f0b67f", "#57b6a2", "#ee266d"],
    ),
)
ax.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    c=["#f0b67f", "#57b6a2", "#ee266d"],
    s=200,
    edgecolor="white",
    marker="X",
)

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_7b.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
centroids = np.asarray([[5, -20], [10, 20], [30, 20]])
kmeans = KMeans(
    n_clusters=3, init=centroids, n_init=1, max_iter=100, random_state=1,
).fit(X)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c="#404040",
    edgecolor="white",
)
ax.scatter(
    centroids[:, 0],
    centroids[:, 1],
    c=["#f0b67f", "#57b6a2", "#ee266d"],
    s=200,
    edgecolor="white",
    marker="X",
)

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_8a.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
centroids = np.asarray([[5, -20], [10, 20], [30, 20]])
kmeans = KMeans(
    n_clusters=3, init=centroids, n_init=1, max_iter=100, random_state=1,
).fit(X)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c=kmeans.labels_,
    edgecolor="white",
    cmap=mcolors.LinearSegmentedColormap.from_list(
        "", ["#f0b67f", "#57b6a2", "#ee266d"],
    ),
)
ax.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    c=["#f0b67f", "#57b6a2", "#ee266d"],
    s=200,
    edgecolor="white",
    marker="X",
)

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_8b.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
centroids = np.asarray([[40, -30], [45, -35], [20, 0]])
kmeans = KMeans(
    n_clusters=3, init=centroids, n_init=1, max_iter=100, random_state=1,
).fit(X)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c="#404040",
    edgecolor="white",
)
ax.scatter(
    centroids[:, 0],
    centroids[:, 1],
    c=["#f0b67f", "#57b6a2", "#ee266d"],
    s=200,
    edgecolor="white",
    marker="X",
)

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_9a.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
centroids = np.asarray([[40, -30], [45, -35], [20, 0]])
kmeans = KMeans(
    n_clusters=3, init=centroids, n_init=1, max_iter=100, random_state=1,
).fit(X)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c=kmeans.labels_,
    edgecolor="white",
    cmap=mcolors.LinearSegmentedColormap.from_list(
        "", ["#f0b67f", "#57b6a2", "#ee266d"],
    ),
)
ax.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    c=["#f0b67f", "#57b6a2", "#ee266d"],
    s=200,
    edgecolor="white",
    marker="X",
)

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_9b.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
colors = ["#f0b67f", "#57b6a2", "#ee266d"]

kmeans = KMeans(
    n_clusters=3, init="random", n_init=1, max_iter=100, random_state=1,
).fit(X)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c=kmeans.labels_,
    edgecolor="white",
    cmap=mcolors.LinearSegmentedColormap.from_list(
        "", colors,
    ),
)
ax.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    c=colors,
    s=200,
    edgecolor="white",
    marker="X",
)
for x, label in zip(X, kmeans.labels_):
    centroid = kmeans.cluster_centers_[label]
    ax.plot(
        [x[0], centroid[0]],
        [x[1], centroid[1]],
        c=colors[label],
        alpha=0.2,
        zorder=0.5,
    )
    

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_10a.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
colors = ["#f0b67f", "#57b6a2", "#ee266d"]

centroids = np.asarray([[40, -30], [45, -35], [20, 0]])
kmeans = KMeans(
    n_clusters=3, init=centroids, n_init=1, max_iter=100, random_state=1,
).fit(X)

fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(
    X[:, 0],
    X[:, 1],
    c=kmeans.labels_,
    edgecolor="white",
    cmap=mcolors.LinearSegmentedColormap.from_list(
        "", colors,
    ),
)
ax.scatter(
    kmeans.cluster_centers_[:, 0],
    kmeans.cluster_centers_[:, 1],
    c=colors,
    s=200,
    edgecolor="white",
    marker="X",
)
for x, label in zip(X, kmeans.labels_):
    centroid = kmeans.cluster_centers_[label]
    ax.plot(
        [x[0], centroid[0]],
        [x[1], centroid[1]],
        c=colors[label],
        alpha=0.2,
        zorder=0.5,
    )
    

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/cluster_10b.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
from sklearn import datasets

np.random.seed(0)

n_samples = 1000
noisy_circles = datasets.make_circles(
    n_samples=n_samples, factor=0.5, noise=0.05
)
noisy_moons = datasets.make_moons(n_samples=n_samples, noise=0.05)
blobs_varied = datasets.make_blobs(
    n_samples=n_samples, cluster_std=[1.0, 2.5, 0.5], random_state=170,
)
no_structure = np.random.rand(n_samples, 2), None

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

datasets = [
    (noisy_circles, 2),
    (noisy_moons, 2),
    (blobs_varied, 3),
    (no_structure, 3),
]

for ax, ((X, y), n_clusters) in zip(axes.flatten(), datasets):
    kmeans = KMeans(n_clusters=n_clusters).fit(X)
    y_pred = kmeans.labels_.astype(int)

    colors = np.asarray(["#f0b67f", "#57b6a2", "#ee266d"])
    ax.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_pred])

    ax.set_xticks(())
    ax.set_yticks(())

plt.tight_layout()

plt.savefig("fig/cluster_11.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
X, y = make_classification(
    n_samples=10,
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_repeated=0,
    n_classes=2,
    n_clusters_per_class=2,
    scale=10,
    class_sep=2.0,
    random_state=1,
)

# from sklearn.preprocessing import MinMaxScaler

# X = MinMaxScaler((0, 10)).fit_transform(X)
# X

In [None]:
def plot_dendrogram(model, **kwargs):
    counts = np.zeros(model.children_.shape[0])
    n_samples = len(model.labels_)
    for i, merge in enumerate(model.children_):
        current_count = 0
        for child_idx in merge:
            if child_idx < n_samples:
                current_count += 1
            else:
                current_count += counts[child_idx - n_samples]
        counts[i] = current_count

    linkage_matrix = np.column_stack(
        [model.children_, model.distances_, counts]
    ).astype(float)

    dendrogram(linkage_matrix, **kwargs)
    
    
model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)

model = model.fit(X)

fig, ax = plt.subplots(figsize=(7.5, 5))

plot_dendrogram(model, truncate_mode="level", p=5, color_threshold=0, ax=ax)

ax.set_xticks([])
ax.set_yticks([])
sns.despine(left=True, bottom=True)

plt.savefig("fig/dendrogram_0.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(X[:, 0], X[:, 1], c="#404040", edgecolor="white")
xlim, ylim = ax.get_xlim(), ax.get_ylim()

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/hclust_0.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()

In [None]:
fig, ax = plt.subplots(figsize=(7.5, 5))

ax.scatter(X[:, 0], X[:, 1], c="#f0b67f", s=300, edgecolor="white")
ax.scatter(X[:, 0], X[:, 1], c="#404040", edgecolor="white")
    
ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")

plt.savefig("fig/hclust_1.png", dpi=300, bbox_inches="tight")
plt.show()
plt.close()