# Imports

In [None]:
from pathlib import Path
import json

import numpy as np
import pandas as pd
pd.options.display.max_columns = 512
import matplotlib.pyplot as plt

# Load data

In [None]:
results = list()
for elem in Path("src/results").glob("*.json"):
    with open(elem, "r") as f:
        results.append(json.load(f))

In [None]:
df = pd.json_normalize(results)
df.shape

### Validate number of executions

In [None]:
(
    df
    .groupby(["dataset", "model", "neurons"])
    .agg({"neurons": "count"})
    .rename(columns={"neurons": "executions"})
    .mean()
)

In [None]:
(
    df
    .groupby(["dataset", "model", "neurons"], as_index=False)
    .agg({"evaluation.acc_test": "mean"})
    .sort_values(["neurons", "model"])
    .pivot(index="dataset", columns=["model", "neurons"], values="evaluation.acc_test")
)

# Plot scatter of all executions

In [None]:
for name, group in df.groupby(["dataset", "model"]):
    plt.figure(figsize=(5, 2))
    ax = plt.gca()
    group.plot.scatter(
        x="neurons", y="evaluation.acc_test",
        color="red", alpha=.5, ax=ax,
    )
    group.plot.scatter(
        x="neurons", y="evaluation.acc_train",
        color="blue", alpha=.5, ax=ax,
    )
    plt.xscale("log")
    plt.title(name)
    plt.show()
    plt.close()

In [None]:
df["evaluation.acc_diff"] = df["evaluation.acc_test"] - df["evaluation.acc_train"]

In [None]:
fig, ax = plt.subplots(3, 1, figsize=(4, 3), sharex=True, sharey=True, dpi=300)

bins = np.linspace(df["evaluation.acc_diff"].min(), df["evaluation.acc_diff"].max())
ax[0].hist(df[df["model"] == "ELM"]["evaluation.acc_diff"], label="ELM", alpha=.7, bins=bins, color="black")
ax[1].hist(df[df["model"] == "ELMReg"]["evaluation.acc_diff"], label="ELM", alpha=.7, bins=bins, color="black")
ax[2].hist(df[df["model"] == "ELMPCA"]["evaluation.acc_diff"], label="ELM", alpha=.7, bins=bins, color="black")

ax[0].set_ylabel("ELM")
ax[1].set_ylabel("ELMReg")
ax[2].set_ylabel("ELMPCA")

ax[0].set_yticks([])
ax[1].set_yticks([])
ax[2].set_yticks([])

plt.tight_layout()
plt.savefig("hist.pdf")

# Plot average metrics

In [None]:
means = (
    df
    .groupby(["dataset", "model", "neurons"], as_index=False)
    .agg({
        "evaluation.acc_train": "mean",
        "evaluation.acc_test": "mean",
        "evaluation.time_to_fit": "mean",
    })
)

means["evaluation.acc_diff"] = means["evaluation.acc_test"] - means["evaluation.acc_train"]

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(10, 2), dpi=300, sharex=True, sharey=True)

for i, dataset in enumerate(["synth_linear", "synth_blobs", "synth_moons"]):
    for (model, color) in [("ELM", "#000000"), ("ELMReg", "#777777"), ("ELMPCA", "#FFFFFF")]:
        aux = means[
            (means["dataset"] == dataset)
            & (means["model"] == model)
        ]

        aux.plot.scatter(x="neurons", y="evaluation.acc_diff", color=color, edgecolors="black", ax=ax[i], s=40, label=model)
        ax[i].grid(alpha=.3)
        ax[i].get_legend().remove()

ax[0].legend()
plt.xscale("log")
plt.tight_layout()
plt.savefig("metrics.pdf")

# Pivot metrics

In [None]:
print(means[means["neurons"] == 1024].round(3)[[
    "dataset", "model",
    "evaluation.acc_train", "evaluation.acc_test", "evaluation.acc_diff", "evaluation.time_to_fit"
]].to_latex())

In [None]:
means[means["neurons"] == 32].round(3)[[
    "dataset", "model",
    "evaluation.acc_train", "evaluation.acc_test", "evaluation.acc_diff", "evaluation.time_to_fit"
]]

In [None]:
fig, ax = plt.subplots(3, 1, figsize=(4, 3), sharex=True, sharey=True, dpi=300)
ret = ax[0].hist(means[means["model"] == "ELM"]["evaluation.acc_diff"], label="ELM", alpha=.7, bins=50, color="black")
means[means["model"] == "ELMReg"]["evaluation.acc_diff"].plot.hist(label="ELMReg", alpha=.7, bins=ret[1], ax=ax[1], color="black")
means[means["model"] == "ELMPCA"]["evaluation.acc_diff"].plot.hist(label="ELMPCA", alpha=.7, bins=ret[1], ax=ax[2], color="black")
ax[0].yaxis.set_label_position("right")
ax[1].yaxis.set_label_position("right")
ax[2].yaxis.set_label_position("right")
ax[0].set_ylabel("ELM")
ax[1].set_ylabel("ELMReg")
ax[2].set_ylabel("ELMPCA")
plt.tight_layout()
plt.savefig("hist.pdf")

# Compare `ELMReg` with `ELMPCA`

In [None]:
cmp = means.pivot(columns="model", index=["dataset", "neurons"], values=["evaluation.acc_test"])
cmp

---