In [None]:
import geopandas as gpd
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from libpysal.graph import read_parquet
import numpy as np

In [None]:
v = "v10"
v_ext = "v10_ext1"

model_params = "_post_processing_v1"

clusters_dir = "/data/uscuni-ulce/processed_data/clusters/"
chars_dir = "/data/uscuni-ulce/processed_data/chars/"
regions_datadir = "/data/uscuni-ulce/"
graph_dir = "/data/uscuni-ulce/processed_data/neigh_graphs/"

In [None]:
country = "fr_sp_nl_be"

region_hulls = gpd.read_parquet(
    regions_datadir + "regions/" + f"{country}_regions_hull.parquet"
)

In [None]:
cdata = pd.read_parquet(f"/data/uscuni-ulce/processed_data/clusters/cdata_{v}.pq")
cdata.index = cdata.index.astype(str)

In [None]:
region_id = 370160 # bcn

In [None]:
def assign_morphs_to_tree(region_id, cdata):
    
    # read buildings cluster and characters
    clusters = pd.read_parquet(
        f"{clusters_dir}clusters_{region_id}_{v_ext}.pq", columns=["final", "morph"]
    )
    chars = pd.read_parquet(f"{chars_dir}primary_chars_{region_id}.parquet")
    chars = chars[chars.index >= 0]
    assert (clusters.index == chars.index).all()

    # get the index of the noise
    is_noise = clusters["final"].values == -1

    # align morphotopes and cdata
    morphotope_groups = chars[~is_noise].groupby(clusters[~is_noise].morph).median()
    morphotope_chars = pd.read_parquet(f"/data/uscuni-ulce/processed_data/morphotopes/morph_chars_{region_id}.pq")
    morphotope_groups["limAre"] = morphotope_chars['limAre']
    morphotope_groups["limLPS"] = morphotope_chars['limLPS']
    morphotope_groups = morphotope_groups[cdata.columns]

    # combine the cluster and the region morphotope chars
    # and scale them
    all_data = pd.concat((cdata, morphotope_groups))
    scalar = StandardScaler()
    vals = scalar.fit_transform(all_data)
    all_data = pd.DataFrame(
        vals, index=all_data.index, columns=all_data.columns
    ).fillna(0)

    # setup tree data
    morphotope_query_data = all_data.loc[morphotope_groups.index]
    tree_data = all_data.loc[cdata.index]
    ## query a knn classifier and assign predictions
    tree = KNeighborsClassifier(n_neighbors=1, n_jobs=-1)
    tree = tree.fit(X=tree_data.values, y=cdata.index.values.astype(str))
    morphotope_predictions = tree.predict(morphotope_query_data.values)
   
    # reassign morphotope labels
    clusters.loc[~is_noise, 'final'] = clusters.loc[~is_noise, 'morph'].map(
        dict(zip(morphotope_groups.index, morphotope_predictions.astype(int)))
    )

    with_noise = clusters.final.copy()

    return with_noise

In [None]:
new_etc_clusters = assign_morphs_to_tree(region_id, cdata)

In [None]:
cluster_mapping = pd.read_parquet(
    f"/data/uscuni-ulce/processed_data/clusters/cluster_mapping_{v}.pq"
)

etcs = gpd.read_parquet(
    f"/data/uscuni-ulce/processed_data/clusters/clusters_{region_id}_{v}.pq"
)


In [None]:
new_etc_names = new_etc_clusters.map(cluster_mapping[3].to_dict())
cnames = {
      1: 'Incoherent Large-Scale Homogeneous Fabric',
      2: 'Incoherent Large-Scale Heterogeneous Fabric',
      3: 'Incoherent Small-Scale Linear Fabric',
      4: 'Incoherent Small-Scale Sparse Fabric',
      5: 'Incoherent Small-Scale Compact Fabric',
      6: 'Coherent Interconnected Fabric',
      7: 'Coherent Dense Disjoint Fabric',
      8: 'Coherent Dense Adjacent Fabric'
}
new_etc_names = new_etc_names.map(cnames)
etcs['l3_names'] = new_etc_names

In [None]:
import lonboard

layer = lonboard.SolidPolygonLayer.from_geopandas(etcs, opacity=0.7)

from sidecar import Sidecar

sc = Sidecar(title="Final Clusters")
m = lonboard.Map(layer, basemap_style=lonboard.basemap.CartoBasemap.DarkMatter)
with sc:
    display(m)

In [None]:
from core.cluster_validation import get_color

layer.get_fill_color = get_color(new_etc_clusters.map(cluster_mapping[3].to_dict()).fillna(-1).astype(int))