# Combine the performance data from individual models

Loop over models and extract focal performance to a single table.

In [None]:
import joblib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

Loop and get the data.

In [None]:
perf = []

for reduction in ["pca", "fa", "umap_dim20_nb5_euclidean"]:
    for model_type in ["lr", "rf"]:
        for cluster in [1, 3, 4, 5, 6, 7, 8]:
            with open(
                f"/data/uscuni-restricted/06_models/{reduction}/label_{cluster}/{model_type}/model.joblib",
                "rb",
            ) as f:
                model = joblib.load(f)
                perf.append(
                    pd.Series(
                        {
                            "reduction": reduction,
                            "model": model_type,
                            "cluster": cluster,
                            "accuracy": model.score_,
                            "balanced_accuracy": model.balanced_accuracy_ * 100,
                            "precision": model.precision_,
                            "recall": model.recall_,
                            "f1_macro": model.f1_macro_,
                            "f1_micro_": model.f1_micro_,
                            "f1_weighted": model.f1_weighted_,
                        }
                    )
                )

Combine.

In [None]:
performance = pd.DataFrame(perf)

In [None]:
performance

In [None]:
performance.to_parquet("/data/uscuni-restricted/06_models/model_performance.parquet")

In [None]:
ax = sns.scatterplot(
    performance, x="cluster", y="balanced_accuracy", hue="model", style="reduction"
)
sns.despine()
sns.move_legend(ax, loc="upper left", bbox_to_anchor=(1, 1), frameon=False)

In [None]:
ax = sns.scatterplot(
    performance, x="cluster", y="f1_macro", hue="model", style="reduction"
)
sns.despine()
sns.move_legend(ax, loc="upper left", bbox_to_anchor=(1, 1), frameon=False)

In [None]:
ax = sns.scatterplot(
    performance, x="cluster", y="accuracy", hue="model", style="reduction"
)
sns.despine()
sns.move_legend(ax, loc="upper left", bbox_to_anchor=(1, 1), frameon=False)

In [None]:
ax = sns.scatterplot(
    performance, x="cluster", y="precision", hue="model", style="reduction"
)
sns.despine()
sns.move_legend(ax, loc="upper left", bbox_to_anchor=(1, 1), frameon=False)

In [None]:
ax = sns.scatterplot(
    performance, x="cluster", y="recall", hue="model", style="reduction"
)
sns.despine()
sns.move_legend(ax, loc="upper left", bbox_to_anchor=(1, 1), frameon=False)

In [None]:
old_perf = pd.read_parquet(
    "/data/uscuni-restricted/06_models/model_performance_old.parquet"
)

In [None]:
(
    performance.replace(
        dict(umap_dim20_nb5_euclidean="umap_dim5_nb20_euclidean")
    ).set_index(["reduction", "model", "cluster"])
    - old_perf.set_index(["reduction", "model", "cluster"])
).style.background_gradient(cmap="RdBu", vmin=-1, vmax=1)

In [None]:
print(
    performance.set_index(["reduction", "model", "cluster"])["f1_macro"]
    .unstack()
    .round(3)
    .to_markdown()
)

In [None]:
performance = performance.replace(
    {
        "pca": "PCA",
        "fa": "Factor Analysis",
        "umap_dim5_nb20_euclidean": "UMAP",
        "lr": "Logistic Regression",
        "rf": "Random Forest",
    }
)

In [None]:
ax = sns.scatterplot(
    data=performance, x="cluster", y="balanced_accuracy", hue="model", style="reduction"
)
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1), frameon=False)
plt.xlabel("Cluster label")
plt.ylabel("Balanced Accuracy (%)")
plt.title("Performace of geographically weighted models")
sns.despine()

In [None]:
ax = sns.scatterplot(
    data=performance, x="cluster", y="f1_macro", hue="model", style="reduction"
)
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1), frameon=False)
plt.xlabel("Cluster label")
plt.ylabel("Balanced Accuracy (%)")
plt.title("Performace of geographically weighted models")
sns.despine()

In [None]:
cluster_names = {
    "1": "Central Urban Developments",
    "2": "Large Scale Outliers",
    "3": "Dense Urban Developments",
    "4": "Street-aligned Developments",
    "5": "Sparse Rural Development",
    "6": "Linear Road Network Developments",
    "7": "Sparse Road Network Developments",
    "8": "Large Scale Developments",
}