# % German

In [None]:
import folium
import geopandas as gpd
import pandas as pd
from libpysal import graph

In [None]:
cd /home/lisa/work/people_places_germany/code

In [None]:
from clusters import assign_clusters
from spatial_autocorrelation import lisa

In [None]:
clusters = gpd.read_parquet(
    "/data/cluster_data/clusters_umap_freiburg_100_3_gaussian_euclidean_complete_chebyshev_3.pq"
)

In [None]:
clusters.explore(column=clusters.index, cmap="tab20")

# Raw Data

In [None]:
demographics = pd.read_parquet(
    "/data/processed_data/Bevoelkerung100M.parquet"
).reset_index()

In [None]:
gdf, largest_overlap, data = assign_clusters(demographics, clusters)

In [None]:
# calculate % german
gdf["STATS", "% German"] = (
    data["NATIONALITY", "Germany"].fillna(0) / data[" INSGESAMT", "Total"]
)
data["STATS", "% German"] = (
    data["NATIONALITY", "Germany"].fillna(0) / data[" INSGESAMT", "Total"]
)

In [None]:
gdf1 = gdf[["ID", "geometry", "cluster", "STATS"]]
gdf1.columns = gdf1.columns.get_level_values(0)

In [None]:
gdf2 = gdf[["ID", "NATIONALITY"]]
gdf2.columns = gdf2.columns.get_level_values(1)

In [None]:
gdf1 = gdf1.merge(gdf2, how="left", on="ID")

In [None]:
# gdf1.explore("STATS", cmap="coolwarm", prefer_canvas=True)

# Compute Spatial Lag

In [None]:
gdf1 = gdf1.cx[4150000:4170000, 2760000:2780000]

In [None]:
gdf1 = gdf1[gdf1.cluster.notna()]
queen = graph.Graph.build_contiguity(gdf1, rook=False)
row_wise_queen = queen.transform("R")

In [None]:
gdf1.columns

In [None]:
for i in ["STATS", "Germany", "Abroad"]:
    gdf1.loc[:, i + "_lag"] = row_wise_queen.lag(gdf1[i])

In [None]:
gdf1

In [None]:
gdf1.to_parquet(
    "/home/lisa/work/people_places_germany/Notebooks/temp_data/%german.parquet"
)

# Local Spatial Autocorrelation

In [None]:
mi, gdf_05 = lisa(gdf1, "STATS", 0.05)

In [None]:
print(f"Moran's I: {mi.I}, p-value: {mi.p_sim}")

In [None]:
# gdf_05.explore("cluster", prefer_canvas=True, cmap=["#d7191c","#fdae61","#abd9e9","#2c7bb6","lightgrey"])

# Cluster Aggregation

In [None]:
d = []
for i in range(0, int(data["cluster", "cluster"].max()) + 1):
    d.append(
        {
            "% German": data[data["cluster", "cluster"] == i][
                "STATS", "% German"
            ].mean(),
            "% German count": data[data["cluster", "cluster"] == i][
                "STATS", "% German"
            ].count(),
        }
    )

stats = pd.DataFrame(d)

In [None]:
stats["label"] = stats.index
stats = clusters.merge(stats, on="label")

In [None]:
m = stats.explore(
    column="% German", cmap="coolwarm", prefer_canvas=True, name="cluster aggregation"
)

gdf_05.explore(
    "sig_cluster",
    prefer_canvas=True,
    cmap=["#d7191c", "#fdae61", "#abd9e9", "#2c7bb6", "lightgrey"],
    name="local spatial autocorrelation",
    m=m,
)

gdf1.explore(
    "STATS", cmap="coolwarm", prefer_canvas=True, vmin=0.85, name="raw data", m=m
)

# Add a layer control to toggle the layers on and off
folium.LayerControl().add_to(m)

# Display the map
m