In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA, FactorAnalysis
from sklearn.preprocessing import StandardScaler

In [None]:
data = gpd.read_parquet("/data/uscuni-restricted/05_fa/merged_fa.parquet")

In [None]:
data = data.dropna(axis=0)

In [None]:
X = data.drop(columns="geometry")

pca = PCA().fit(data.drop(columns="geometry"))
eigenvalues = pca.explained_variance_

# Scree plot
plt.figure(figsize=(15, 15))
plt.plot(range(1, len(eigenvalues) + 1), eigenvalues, marker="o")
plt.axhline(1, color="r", linestyle="--", label="Eigenvalue = 1")
plt.title("Scree Plot for components of components")
plt.xlabel("Component Number")
plt.ylabel("Eigenvalue")
plt.legend()
plt.grid()
plt.show()

# Print suggested number of components (Kaiser rule)
suggested = sum(eigenvalues > 1)
print(f"Suggested number of components (eigenvalue > 1): {suggested}")

In [None]:
fa = FactorAnalysis(n_components=17, max_iter=10000)
fa.fit(data.drop(columns="geometry"))

# Get Loadings directly from components_
loadings = fa.components_.T

# Convert to DataFrame for readability
loadings_df = pd.DataFrame(
    loadings.T,
    columns=data.drop(columns="geometry").columns,
)
loadings_df

In [None]:
# Transform and build DataFrame
transformed = fa.transform(data.drop(columns="geometry"))

df_fa = pd.DataFrame(
    transformed,
    index=data.drop(columns="geometry").index,
).set_geometry(data.geometry)

df_fa.columns = df_fa.columns.astype(str)
# Save to Parquet
df_fa.to_parquet("/data/uscuni-restricted/05_fa/fa_of_fa_geometries.parquet")

In [None]:
df_fa