# Generate morphotope geometry with cluster information

Dissolves enclosed tessellation cells into morphotopes for visualisation purposes and links cluster information we have on buildings to each morphotope. To be displayed on the interactive map.

In [1]:
import os

import geopandas as gpd
import pandas as pd
import numpy as np
from libpysal import graph
from tqdm.auto import tqdm

In [2]:
folder = "/data/uscuni-ulce/processed_data/tessellations/"
clusters_folder = "/data/uscuni-ulce/processed_data/clusters/"
morphotope_folder = "/data/uscuni-ulce/processed_data/morphotope_clusters/"

In [3]:
v = "v10"

In [4]:
# os.mkdir(morphotope_folder)

In [5]:
regions = region_hulls = gpd.read_parquet("/data/uscuni-ulce/regions/cadastre_regions_hull.parquet")

In [6]:
model_params = '_post_processing_v1'

In [9]:
def generate_morphotope_geometry(region_id):

    # read data
    etcs = gpd.read_parquet(f'{folder}tessellation_{region_id}.parquet')
    labels = pd.read_parquet(f'/data/uscuni-ulce/processed_data/morphotopes/tessellation_labels_morphotopes_{region_id}{model_params}.pq').morphotope_label
    etcs['label'] = labels
    etcs.label = etcs.label.fillna('-1_-1')

    # generate dissolved tesselation cells into morphotopes
    morph_geometries = etcs[~etcs["label"].str.contains("-1")].dissolve("label")
    morph_geometries.geometry = morph_geometries.buffer(.01)

    # dissolve noise etcs into contiguous geometries
    noise = etcs[etcs["label"].str.contains("-1")]
    contig = graph.Graph.build_fuzzy_contiguity(noise, buffer=0.1)
    noise_morphotopes = noise.dissolve(contig.component_labels)
    noise_morphotopes.geometry = noise_morphotopes.buffer(.01)

    complete_morphotopes = pd.concat([morph_geometries, noise_morphotopes.set_index(noise_morphotopes.index.astype(str) + '_-1')])
    complete_morphotopes.to_parquet(f"{morphotope_folder}{v}/{region_id}_clusters.pq")

In [11]:
%%time

from joblib import Parallel, delayed
n_jobs = -1
new = Parallel(n_jobs=n_jobs)(
    delayed(generate_morphotope_geometry)(region_id) for region_id, _ in regions.iterrows()
)



CPU times: user 3.67 s, sys: 1.9 s, total: 5.58 s
Wall time: 17min 35s
