# Generate morphotope geometry with cluster information

Dissolves enclosed tessellation cells into morphotopes for visualisation purposes and links cluster information we have on buildings to each morphotope. To be displayed on the interactive map.

In [1]:
import os

import geopandas as gpd
import pandas as pd
import numpy as np
from libpysal import graph
from tqdm.auto import tqdm

In [2]:
folder = "/data/uscuni-ulce/processed_data/tessellations/"
clusters_folder = "/data/uscuni-ulce/processed_data/clusters/"
morphotope_folder = "/data/uscuni-ulce/processed_data/morphotope_clusters/"

In [3]:
v = "v7"

In [4]:
# os.mkdir(morphotope_folder)

In [33]:
regions = region_hulls = gpd.read_parquet("/data/uscuni-ulce/regions/cadastre_regions_hull.parquet")

In [6]:
model_params = '_post_processing_v1'

In [12]:
def morphotopes_to_etcs(region_id, etcs=True, model_params='_100_0_None_None_False'):


    if etcs:
        etcs = gpd.read_parquet(f'/data/uscuni-ulce/processed_data/tessellations/tessellation_{region_id}.parquet')

    else:
        etcs = gpd.read_parquet(f'/data/uscuni-ulce/processed_data/buildings/buildings_{region_id}.parquet')
        
    etcs['label'] = -1
    
    morphotopes = pd.read_parquet(f'/data/uscuni-ulce/processed_data/morphotopes/tessellation_labels_morphotopes_{region_id}{model_params}.pq')
    morphotopes.loc[:, 'morphotope_label'] =  morphotopes.values[:, 0]

    morph_dict = pd.Series(np.arange(np.unique(morphotopes.values).shape[0]),
                       np.unique(morphotopes.values))
    etcs.loc[morphotopes.index, 'label'] = morphotopes.map(lambda x: morph_dict.loc[x]).values
    etcs['morph'] = str(region_id) + '_' + '-1'
    etcs.loc[morphotopes.index, 'morph'] = str(region_id) + '_' + morphotopes.values
    return etcs

In [13]:
region_id = 69333

In [64]:
def generate_morphotope_geometry(region_id):

    # read data
    etcs = gpd.read_parquet(f'/data/uscuni-ulce/processed_data/tessellations/tessellation_{region_id}.parquet')
    labels = pd.read_parquet(f'/data/uscuni-ulce/processed_data/morphotopes/tessellation_labels_morphotopes_{region_id}{model_params}.pq').morphotope_label
    etcs['label'] = labels
    etcs.label = etcs.label.fillna('-1_-1')

    # generate dissolved tesselation cells into morphotopes
    morph_geometries = etcs[~etcs["label"].str.contains("-1")].dissolve("label")
    morph_geometries.geometry = morph_geometries.buffer(.01)

    # dissolve noise etcs into contiguous geometries
    noise = etcs[etcs["label"].str.contains("-1")]
    contig = graph.Graph.build_fuzzy_contiguity(noise, buffer=0.1)
    noise_morphotopes = noise.dissolve(contig.component_labels)
    noise_morphotopes.geometry = noise_morphotopes.buffer(.01)

    complete_morphotopes = pd.concat([morph_geometries, noise_morphotopes.set_index(noise_morphotopes.index.astype(str) + '_-1')])
    complete_morphotopes.to_parquet(f'/data/uscuni-ulce/processed_data/morphtope_geometries/geoms_{region_id}{model_params}_clusters.pq')

In [65]:
%%time
generate_morphotope_geometry(region_id)

CPU times: user 38.7 s, sys: 668 ms, total: 39.3 s
Wall time: 39.3 s


In [66]:
%%time

from joblib import Parallel, delayed
n_jobs = -1
new = Parallel(n_jobs=n_jobs)(
    delayed(generate_morphotope_geometry)(region_id) for region_id, _ in regions.iterrows()
)



CPU times: user 4.66 s, sys: 5.6 s, total: 10.3 s
Wall time: 20min 53s


In [18]:
# read all morphotope geometries
geoms = []
for region_id in regions.index:
    region_morphotopes = gpd.read_parquet(f'/data/uscuni-ulce/processed_data/morphtope_geometries/geoms_{region_id}{model_params}_clusters.pq')
    region_morphotopes = region_morphotopes[region_morphotopes.label.isna()]
    region_morphotopes.index = str(region_id) + '_' + region_morphotopes.index
    geoms.append(region_morphotopes)

morphotopes = pd.concat(geoms)

In [23]:
%%time
contig = graph.Graph.build_fuzzy_contiguity(morphotopes, buffer=0.1)

CPU times: user 5min 3s, sys: 7.92 s, total: 5min 11s
Wall time: 5min 11s


In [None]:
contig = contig.assign_self_weight()

In [25]:
# contig.to_parquet('/data/uscuni-ulce/processed_data/morphtope_geometries/graph.pq')

In [64]:
morphotopes_dir = '/data/uscuni-ulce/processed_data/morphotopes/'

def read_morphotopes_chars():
    
    data = []
    for region_id, _ in region_hulls.iterrows():
        extra_chars = pd.read_parquet(f'{morphotopes_dir}morph_chars_{region_id}.pq')
        data.append(extra_chars)
    
    data = pd.concat(data)
    return data

In [65]:
morph_chars_data = read_morphotopes_chars()

In [66]:
contig = read_parquet('/data/uscuni-ulce/processed_data/morphtope_geometries/graph.pq')
contig = contig.assign_self_weight()

connected_area = morph_chars_data['limAre'].loc[contig.unique_ids]
connected_per = morph_chars_data['limPer'].loc[contig.unique_ids]

res_area = contig.lag(connected_area)
res_perim = contig.lag(connected_per)


morph_chars_data['limAre_lag'] = pd.Series(connected_area, index = contig.unique_ids)
morph_chars_data['limPer_lag'] = pd.Series(connected_per, index = contig.unique_ids)

del contig

Unnamed: 0_level_0,limMFR,limMTC,limLMFR,limLPS,limAre,limPer,limLAL,limAre_min,limPer_min,limLAL_min
morph,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
4_0_0,0.892466,0.872093,0.970569,1.0,614.044020,314.022821,114.156656,139.115433,145.378683,51.441497
4_0_1,0.418164,0.846847,0.402235,1.0,61033.640680,5198.670759,1510.251963,112.460908,139.004412,50.386449
4_0_10,0.596141,0.800000,0.557893,1.0,20246.639736,2552.897249,770.168562,155.044988,161.727346,56.688064
4_0_100,0.914915,0.847458,0.956665,1.0,3750.118216,1439.972862,406.897033,275.980107,217.070679,79.450764
4_0_101,0.905944,0.867925,0.900318,1.0,4898.520489,1836.033362,461.959483,306.247418,254.026300,88.405634
...,...,...,...,...,...,...,...,...,...,...
152511_809_0,0.852618,0.833333,0.853045,1.0,2635.238937,740.117223,256.057881,163.541837,162.753077,58.160159
152511_8_0,0.932091,0.896774,0.940105,1.0,3297.389888,1050.999677,353.692176,228.043315,193.571876,69.613051
152511_8_1,0.860286,0.821053,0.869738,1.0,4457.811003,1177.861708,392.034372,201.021874,181.386676,65.108655
152511_8_2,0.903059,0.879195,0.912246,1.0,2557.651710,825.129001,302.090125,207.655549,185.992686,66.850173


In [28]:
contig.lag?

[0;31mSignature:[0m [0mcontig[0m[0;34m.[0m[0mlag[0m[0;34m([0m[0my[0m[0;34m,[0m [0mcategorical[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m [0mties[0m[0;34m=[0m[0;34m'raise'[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Spatial lag operator

Constructs spatial lag based on neighbor relations of the graph.


Parameters
----------
y : array
    numpy array with dimensionality conforming to w
categorical : bool
    True if y is categorical, False if y is continuous.
ties : {'raise', 'random', 'tryself'}, optional
    Policy on how to break ties when a focal unit has multiple
    modes for a categorical lag.
    - 'raise': This will raise an exception if ties are
    encountered to alert the user (Default).
    - 'random': modal label ties Will be broken randomly.
    - 'tryself': check if focal label breaks the tie between label
    modes.  If the focal label does not break the modal tie, the
    tie will be be broken randomly. If the focal unit has a
 

In [10]:
def generate_morphotope_geometry(region_id):
    tess = gpd.read_parquet(f"{folder}tessellation_{region_id}.parquet")
    blg = gpd.read_parquet(f"{clusters_folder}clusters_{region_id}_{v}.pq")
    tess_w_data = tess.join(blg[["morph", "final", "regional", "final_without_noise"]], how="inner")

    morphotopes = tess_w_data[~tess_w_data["morph"].str.contains("-1")].dissolve("morph")
    morphotopes.geometry = morphotopes.buffer(.01)

    noise = tess_w_data[tess_w_data["morph"].str.contains("-1")]
    contig = graph.Graph.build_fuzzy_contiguity(noise, buffer=0.1)
    noise_morphotopes = noise.dissolve(contig.component_labels)
    noise_morphotopes.geometry = noise_morphotopes.buffer(.01)
    complete_morphotopes = pd.concat([morphotopes, noise_morphotopes.set_index("morph")])

    complete_morphotopes.to_parquet(f"{morphotope_folder}{region_id}_clusters.pq")

In [11]:
for region in tqdm(regions.index[::-1]):
    generate_morphotope_geometry(region)

  0%|          | 0/847 [00:00<?, ?it/s]