In [15]:
import geopandas as gpd
import pandas as pd
import numpy as np
from scipy.spatial import KDTree
from sklearn.preprocessing import QuantileTransformer, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from libpysal.graph import read_parquet

In [16]:
clusters_dir = '/data/uscuni-ulce/processed_data/clusters/'
chars_dir = '/data/uscuni-ulce/processed_data/chars/'
regions_datadir = "/data/uscuni-ulce/"
region_hulls = gpd.read_parquet(
        regions_datadir + "regions/" + "cadastre_regions_hull.parquet"
)
graph_dir = '/data/uscuni-ulce/processed_data/neigh_graphs/'
morphotopes_dir = '/data/uscuni-ulce/processed_data/morphotopes/'

In [17]:
region_id = 69333

In [18]:
input_model_params = '_75_0_None_None_False'

In [19]:
output_model_params = '_post_processing_v1'

# 1. Change morphotope boundaries so that adjacent buildings are always in the majority morphotope.

In [125]:
def post_process_morphotope_labels(region_id, input_model_params, output_model_params):
    
    ## read data
    bq1 = read_parquet(graph_dir + f"building_graph_{region_id}.parquet")
    region_morphotope_labels = pd.read_parquet(f'{morphotopes_dir}tessellation_labels_morphotopes_{region_id}{input_model_params}.pq').morphotope_label
    region_morphotope_labels = region_morphotope_labels.loc[bq1.unique_ids]
    
    # assign mode of non-noise clusters to whole adjacent structure
    # if its only noise, assign the most common noise cluster
    def non_noise_mode(x):
        non_noise = x[~x.str.endswith('-1')]
        if non_noise.shape[0]:
            return pd.Series.mode(non_noise)[0]
        else:
            return pd.Series.mode(x)[0]
            
    component_morphotopes = region_morphotope_labels.groupby(bq1.component_labels).agg(non_noise_mode)
    
    aggregated_morphotope_labels = bq1.component_labels.map(component_morphotopes.to_dict())
    aggregated_morphotope_labels.name = 'morphotope_label'
    assert (aggregated_morphotope_labels.index == region_morphotope_labels.index).all()
    aggregated_morphotope_labels.to_frame().to_parquet(f'{morphotopes_dir}tessellation_labels_morphotopes_{region_id}{output_model_params}.pq')

In [126]:
%%time
from joblib import Parallel, delayed
n_jobs = -1
new = Parallel(n_jobs=n_jobs)(
    delayed(post_process_morphotope_labels)(region_id, input_model_params, output_model_params) for region_id, _ in region_hulls.iterrows()
)

CPU times: user 2.77 s, sys: 1.55 s, total: 4.32 s
Wall time: 15min 3s


# 2. Generate new morphotope data based on the new morphotope boundaries

In [23]:
def percentile(n):
    def percentile_(x):
        return np.percentile(x, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

def post_process_morphotope_data(region_id, input_model_params, output_model_params):
    ## read data
    new_morphotope_labels = pd.read_parquet(f'{morphotopes_dir}tessellation_labels_morphotopes_{region_id}{output_model_params}.pq').morphotope_label
    X_train = pd.read_parquet(chars_dir + f'primary_chars_{region_id}.parquet')
    component_data = X_train.loc[new_morphotope_labels.index]
    
    
    # get morphotope stats
    component_data = component_data.groupby(new_morphotope_labels.values).agg([percentile(25), 'median', percentile(75), 'std', 'mean'])
    
    # save sizes for clustering
    component_data[('Size', 'Size')] = X_train.loc[new_morphotope_labels.index].groupby(new_morphotope_labels.values).size()
    
    # store morphotopes data
    component_data.to_parquet(morphotopes_dir + f'data_morphotopes_{region_id}_{output_model_params}.pq')

In [24]:
%%time
post_process_morphotope_data(region_id, input_model_params, output_model_params)

In [25]:
%%time
from joblib import Parallel, delayed
n_jobs = -1
new = Parallel(n_jobs=n_jobs)(
    delayed(post_process_morphotope_data)(region_id, input_model_params, output_model_params) for region_id, _ in region_hulls.iterrows()
)



CPU times: user 3.12 s, sys: 1.37 s, total: 4.49 s
Wall time: 18min 25s


### Plotting

In [110]:

from lonboard import SolidPolygonLayer, Map
from lonboard.basemap import CartoBasemap
from lonboard.colormap import apply_categorical_cmap
from palettable.colorbrewer.qualitative import Set3_12
from core.cluster_validation import get_color

In [111]:
buildings = gpd.read_parquet(f'/data/uscuni-ulce/processed_data/buildings/buildings_{region_id}.parquet')

In [112]:
buildings['morph'] = region_morphotope_labels
buildings['new_morph'] = aggregated_morphotope_labels
buildings['component_label'] = bq1.component_labels

In [113]:
layer = SolidPolygonLayer.from_geopandas(
    gdf=buildings[["geometry", "morph", 'new_morph', 'component_label']], opacity=0.15
)



In [114]:
m = Map(layer, basemap_style=CartoBasemap.Positron)
m

Map(basemap_style=<CartoBasemap.Positron: 'https://basemaps.cartocdn.com/gl/positron-gl-style/style.json'>, la…

In [115]:
factors, idx = buildings['new_morph'].factorize()

In [116]:
layer.get_fill_color = get_color(factors)