# Generate morphotope-level attributes

In [2]:
import geopandas as gpd
import pandas as pd
import numpy as np
from libpysal.graph import read_parquet, Graph
from shapely import unary_union
import momepy as mm
from sklearn.preprocessing import StandardScaler

from shapely import get_coordinates
from scipy.cluster.hierarchy import single
from scipy.spatial.distance import pdist

In [4]:
model_params = "_post_processing_v1"
buildings_dir = "/data/uscuni-ulce/processed_data/buildings/"
graph_dir = "/data/uscuni-ulce/processed_data/neigh_graphs/"
morph_dir = "/data/uscuni-ulce/processed_data/morphotopes/"

In [None]:
regions_datadir = "/data/uscuni-ulce/"
region_hulls = gpd.read_parquet(
    regions_datadir + "regions/" + "cadastre_regions_hull.parquet"
)

## Parallel processing

In [6]:
def morphotopes_to_etcs(region_id, etcs=True, model_params="_100_0_None_None_False"):
    if etcs:
        etcs = gpd.read_parquet(
            f"/data/uscuni-ulce/processed_data/tessellations/tessellation_{region_id}.parquet"
        )

    else:
        etcs = gpd.read_parquet(
            f"/data/uscuni-ulce/processed_data/buildings/buildings_{region_id}.parquet"
        )

    etcs["label"] = -1

    morphotopes = pd.read_parquet(
        f"/data/uscuni-ulce/processed_data/morphotopes/tessellation_labels_morphotopes_{region_id}{model_params}.pq"
    )
    morphotopes.loc[:, "morphotope_label"] = morphotopes.values[:, 0]

    morph_dict = pd.Series(
        np.arange(np.unique(morphotopes.values).shape[0]), np.unique(morphotopes.values)
    )
    etcs.loc[morphotopes.index, "label"] = morphotopes.map(
        lambda x: morph_dict.loc[x]
    ).values
    etcs["morph"] = str(region_id) + "_" + "-1"
    etcs.loc[morphotopes.index, "morph"] = str(region_id) + "_" + morphotopes.values
    return etcs


def generate_ratio(group, buildings_q1):
    group_graph = buildings_q1.subgraph(group.index)
    connected_buildings = group.geometry.groupby(group_graph.component_labels).apply(
        lambda x: unary_union(x.values)
    )
    libNCos = group.libNCo.median()
    connected_buildings = connected_buildings

    areas = connected_buildings.area
    elongation = mm.elongation(connected_buildings)
    fr = mm.facade_ratio(connected_buildings)
    thin = (fr < 8) & (elongation < 0.90)

    morph_fr_area_ratio = areas[thin].sum() / areas.sum()
    morph_fr_count_ratio = (
        connected_buildings[thin].count() / connected_buildings.count()
    )

    largest = areas > areas.median()
    largest_thin = thin & largest

    largest_morph_fr_area_ratio = areas[largest_thin].sum() / areas[largest].sum()

    limLPS = ((morph_fr_area_ratio > 0.4) | (libNCos > 0)).astype(int)

    ## add distances between buildings in the morphotope

    if connected_buildings.shape[0] > 3:
        tri = Graph.build_triangulation(
            connected_buildings.representative_point(),
            method="relative_neighborhood",
            kernel="identity",
        )
        val = tri._adjacency.describe()["std"] / connected_buildings.length.median()
    elif group.shape[0] < 3:
        # 99886 has a isolated building that gets treated as a morphotope
        val = 0.35
    else:
        tri = Graph.build_triangulation(
            group.representative_point(),
            method="relative_neighborhood",
            kernel="identity",
        )
        val = tri._adjacency.describe()["std"] / connected_buildings.length.median()

    limit_value = 10

    return pd.Series(
        {
            "limMFR": morph_fr_area_ratio,
            "limMTC": morph_fr_count_ratio,
            "limLMFR": largest_morph_fr_area_ratio,
            "limLPS": limLPS,
            "limAre": connected_buildings.geometry.area.sort_values(ascending=True)[
                -min(limit_value, connected_buildings.shape[0]) :
            ].sum(),
            "limPer": connected_buildings.geometry.length.sort_values(ascending=True)[
                -min(limit_value, connected_buildings.shape[0]) :
            ].sum(),
            "limLAL": mm.longest_axis_length(connected_buildings)
            .sort_values(ascending=True)[
                -min(limit_value, connected_buildings.shape[0]) :
            ]
            .sum(),
            "limSDi": val,
        }
    )

In [7]:
def add_morph_chars(region_id):
    etcs = morphotopes_to_etcs(region_id, model_params=model_params)
    buildings = gpd.read_parquet(
        f"/data/uscuni-ulce/processed_data/chars/buildings_chars_{region_id}.parquet"
    )

    morphs = etcs[etcs.morph.str.split("_").str[-1] != "-1"].to_crs(epsg=3035)
    buildings = buildings.loc[morphs.index]
    buildings["morph"] = etcs["morph"]
    buildings_q1 = read_parquet(graph_dir + f"building_graph_{region_id}.parquet")
    res = buildings.groupby("morph").apply(generate_ratio, buildings_q1)

    res.to_parquet(
        f"/data/uscuni-ulce/processed_data/morphotopes/morph_chars_{region_id}.pq"
    )

In [9]:
%%time
from joblib import Parallel, delayed

n_jobs = -1
new = Parallel(n_jobs=n_jobs)(
    delayed(add_morph_chars)(region_id) for region_id, _ in region_hulls.iterrows()
)



CPU times: user 1.88 s, sys: 1.05 s, total: 2.93 s
Wall time: 14min 4s
