In [None]:
import pandas as pd
import numpy as np

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use("grayscale")

In [None]:
from sklearn.datasets import make_circles

# Circles Example

In [None]:
data, target = make_circles(n_samples=500, noise=.05, factor=.5)

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(5, 2), dpi=300)

c0 = data[np.where(target > 0)]
c1 = data[np.where(~(target > 0))]

ax[0].scatter(c0[:, 0], c0[:, 1], alpha=.3, s=10, edgecolors="black")
ax[0].scatter(c1[:, 0], c1[:, 1], alpha=.3, s=10, edgecolors="white")

ax[1].scatter(c0[:, 0]**2, c0[:, 1]**2, alpha=.3, s=10, edgecolors="black")
ax[1].scatter(c1[:, 0]**2, c1[:, 1]**2, alpha=.3, s=10, edgecolors="white")

ax[0].set_yticks([])
ax[0].set_xticks([])
ax[1].set_yticks([])
ax[1].set_xticks([])

ax[0].set_xlabel("$X_1$")
ax[0].set_ylabel("$X_2$")

ax[1].set_xlabel("$X_1^2$")
ax[1].set_ylabel("$X_2^2$")

plt.tight_layout()
plt.savefig("circles.pdf")

# Results

In [None]:
df = pd.read_csv("results.csv").iloc[:, 1:]
df.head()

In [None]:
df["acc_diff"] = (df["acc_test"] - df["acc_train"]).abs()

In [None]:
df["dataset"].unique()

In [None]:
df.columns

In [None]:
sns.lmplot(
    data=df, x="alpha", y="best_loss", col="dataset",
    col_wrap=4, palette="muted", ci=None,
    height=2, scatter_kws={"s": 10, "alpha": .3},
)
plt.xscale("log")

In [None]:
sns.lmplot(
    data=df, x="alpha", y="gg_border_perc", col="dataset",
    col_wrap=4, palette="muted", ci=None,
    height=2, scatter_kws={"s": 10, "alpha": .3},
)
plt.xscale("log")

In [None]:
metrics = [
    "silhouette_score", "sil_neg_samples_score",
    "calinski_harabasz_score", "davies_bouldin_score",
    "gg_neigh_index", "gg_border_perc",
]

In [None]:
aux = list()
for cdt in df["dataset"].unique():
    aux.append(
        df[df["dataset"] == cdt]
        .corr(numeric_only=True)
        [["alpha"]]
        .loc[metrics]
        .reset_index()
        .rename(columns={"index": "metrics"})
        .assign(dataset=cdt)
    )
corrs = pd.concat(aux)

In [None]:
corrs.pivot(columns="dataset", index="metrics", values="alpha")

In [None]:
df.corr(numeric_only=True)[["alpha"]].loc[metrics]

---