# Bandwidth search on Principal Component Analysis

Find optimal bandwidth for each class using a PCA option as the reduction input.

In [None]:
import pathlib
from glob import glob

import geopandas as gpd
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from gwlearn.ensemble import GWRandomForestClassifier
from gwlearn.linear_model import GWLogisticRegression
from gwlearn.search import BandwidthSearch

In [None]:
# Open scores for adaptive bandwidth
a_files = glob("*adaptive*.csv")
a_files = [f for f in a_files if "new" in f]
a_files = [f for f in a_files if "pca" in f]

In [None]:
a_files

In [None]:
pca_bw = {1: 1500, 3: 1500, 4: 1300, 5: 1500, 6: 300, 7: 1500, 8: 1700}

In [None]:
# Add all labels to one list
a_files_list = []
for i in a_files:
    a_files_list.append(pd.read_csv(i, index_col=0).rename(columns={"aic": i}))
a_data = pd.concat(a_files_list, axis=1)

# Plot and print the scores
a_data.plot()
for col in a_data:
    print(col)
    print(a_data.loc[:, col].idxmin(), np.min(a_data.loc[:, col]))

In [None]:
# Open scores for fixed bandwidth
f_files = glob("*adaptive*.csv")
f_files = [f for f in f_files if "new" in f]
f_files = [f for f in f_files if "fa" in f]

In [None]:
fa_bw = {1: 1300, 3: 1500, 4: 1700, 5: 500, 6: 300, 7: 1300, 8: 900}

In [None]:
# Add all labels to one list
f_files_list = []
for i in f_files:
    f_files_list.append(pd.read_csv(i, index_col=0).rename(columns={"aic": i}))
f_data = pd.concat(f_files_list, axis=1)

# Plot and print the scores
f_data.plot()
for col in f_data:
    print(col)
    print(f_data.loc[:, col].idxmin(), np.min(f_data.loc[:, col]))

In [None]:
# Open scores for fixed bandwidth
u_files = glob("*adaptive*.csv")
u_files = [f for f in u_files if "new" in f]
u_files = [f for f in u_files if "umap" in f]

In [None]:
umap_bw = {1: 1300, 3: 1500, 4: 1300, 5: 700, 6: 300, 7: 1300, 8: 700}

In [None]:
# Add all labels to one list
u_files_list = []
for i in u_files:
    u_files_list.append(pd.read_csv(i, index_col=0).rename(columns={"aic": i}))
u_data = pd.concat(u_files_list, axis=1)

# Plot and print the scores
plt.figure(figsize=(10, 6))
ax = u_data.plot(ax=plt.gca())
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0.0)

for col in u_data:
    print(col)
    print(u_data.loc[:, col].idxmin(), np.min(u_data.loc[:, col]))