# % Rented

In [None]:
import folium
import geopandas as gpd
import pandas as pd
from libpysal import graph

In [None]:
cd /home/lisa/work/people_places_germany/code

In [None]:
from clusters import assign_clusters
from spatial_autocorrelation import lisa

In [None]:
clusters = gpd.read_parquet(
    "/data/cluster_data/clusters_umap_freiburg_100_3_gaussian_euclidean_complete_chebyshev_3.pq"
)

In [None]:
# clusters.explore(column=clusters.index,cmap='tab20')

# Raw Data

In [None]:
demographics = pd.read_parquet(
    "/data/processed_data/Wohnungen100m.parquet"
).reset_index()

In [None]:
gdf, largest_overlap, data = assign_clusters(demographics, clusters)

In [None]:
gdf["STATS", "Rented for residential purposes"] = (
    data["USE_TYPE", "Rented: with currently managed household"].fillna(0)
    + data["USE_TYPE", "Rented: without currently managed household"].fillna(0)
) / data["USE_TYPE"].sum(axis=1)

In [None]:
gdf1 = gdf[["ID", "geometry", "STATS", "cluster"]]
gdf1.columns = gdf1.columns.get_level_values(0)

In [None]:
gdf2 = gdf[["ID", "USE_TYPE"]]
gdf2.columns = gdf2.columns.get_level_values(1)
gdf1 = gdf1.merge(gdf2, how="left", on="ID")

# Compute Spatial Lag

In [None]:
gdf1 = gdf1.cx[4150000:4170000, 2760000:2780000]

In [None]:
gdf1 = gdf1[gdf1.cluster.notna()]
queen = graph.Graph.build_contiguity(gdf1, rook=False)
row_wise_queen = queen.transform("R")

In [None]:
gdf1.columns

In [None]:
for i in [
    "STATS",
    "Rented: with currently managed household",
    "Ownership: with currently managed household",
    "Holiday and leisure apartment",
    "Vacant",
    "Ownership: without currently managed household",
    "Rented: without currently managed household",
]:
    gdf1.loc[:, i + "_lag"] = row_wise_queen.lag(gdf1[i])

In [None]:
gdf1.to_parquet(
    "/home/lisa/work/people_places_germany/Notebooks/temp_data/%rented.parquet"
)

In [None]:
# gdf1.cx[4150000:4170000,2760000:2780000].explore("STATS", cmap="coolwarm", prefer_canvas=True)

# Local Spatial Autocorrelation

In [None]:
mi, gdf_05 = lisa(gdf1.cx[4150000:4170000, 2760000:2780000], "STATS", 0.05)

In [None]:
print(f"Moran's I: {mi.I}, p-value: {mi.p_sim}")

In [None]:
# gdf_05.explore("cluster", prefer_canvas=True, cmap=["#d7191c","#fdae61","#abd9e9","#2c7bb6","lightgrey"])

# Cluster Aggregation

In [None]:
d = []
for i in range(0, int(gdf1["cluster"].max()) + 1):
    d.append(
        {
            "stats": gdf1[gdf1["cluster"] == i]["STATS"].mean(),
            "stats count": gdf1[gdf1["cluster"] == i]["STATS"].count(),
        }
    )

stats = pd.DataFrame(d)

In [None]:
stats["label"] = stats.index
stats = clusters.merge(stats, on="label")

In [None]:
# stats.explore(column='stats', cmap="coolwarm", prefer_canvas=True)

In [None]:
m = stats.explore(
    column="stats", cmap="coolwarm", prefer_canvas=True, name="cluster aggregation"
)

gdf_05.explore(
    "sig_cluster",
    prefer_canvas=True,
    cmap=["#d7191c", "#fdae61", "#abd9e9", "#2c7bb6", "lightgrey"],
    name="local spatial autocorrelation",
    m=m,
)

gdf1.explore("STATS", cmap="coolwarm", prefer_canvas=True, name="raw data", m=m)

# Add a layer control to toggle the layers on and off
folium.LayerControl().add_to(m)

# Display the map
m