# Generate morphotope-level attributes

In [2]:
import geopandas as gpd
import pandas as pd
import numpy as np
from libpysal.graph import read_parquet, Graph
from shapely import unary_union
import momepy as mm
from sklearn.preprocessing import StandardScaler

from shapely import get_coordinates
from scipy.cluster.hierarchy import single
from scipy.spatial.distance import pdist

In [3]:
country = "belgium"

In [4]:
model_params = "_post_processing_v1"
buildings_dir = "/data/uscuni-ulce/processed_data/buildings/"
graph_dir = "/data/uscuni-ulce/processed_data/neigh_graphs/"
morph_dir = "/data/uscuni-ulce/processed_data/morphotopes/"

In [5]:
regions_datadir = "/data/uscuni-ulce/"
region_hulls = gpd.read_parquet(
    regions_datadir + "regions/" + f"{country}_regions_hull.parquet"
)

In [6]:
# region_id = 139196 # prague

region_id = 69333


# region_id = 8707
# region_id = 5883 # freiburtg
# region_id = 86873
# region_id = 38679
# region_id = 55763
# region_id = 107131
# region_id = 99886

# region_id = 151676 # vilnius

# region_id= 66593

## Parallel processing

In [6]:
def morphotopes_to_etcs(region_id, etcs=True, model_params="_100_0_None_None_False"):
    if etcs:
        etcs = gpd.read_parquet(
            f"/data/uscuni-ulce/processed_data/tessellations/tessellation_{region_id}.parquet"
        )

    else:
        etcs = gpd.read_parquet(
            f"/data/uscuni-ulce/processed_data/buildings/buildings_{region_id}.parquet"
        )

    etcs["label"] = -1

    morphotopes = pd.read_parquet(
        f"/data/uscuni-ulce/processed_data/morphotopes/tessellation_labels_morphotopes_{region_id}{model_params}.pq"
    )
    morphotopes.loc[:, "morphotope_label"] = morphotopes.values[:, 0]

    morph_dict = pd.Series(
        np.arange(np.unique(morphotopes.values).shape[0]), np.unique(morphotopes.values)
    )
    etcs.loc[morphotopes.index, "label"] = morphotopes.map(
        lambda x: morph_dict.loc[x]
    ).values
    etcs["morph"] = str(region_id) + "_" + "-1"
    etcs.loc[morphotopes.index, "morph"] = str(region_id) + "_" + morphotopes.values
    return etcs


def generate_ratio(group, buildings_q1):
    group_graph = buildings_q1.subgraph(group.index)
    connected_buildings = group.geometry.groupby(group_graph.component_labels).apply(
        lambda x: unary_union(x.values)
    )
    libNCos = group.libNCo.median()
    connected_buildings = connected_buildings

    areas = connected_buildings.area
    elongation = mm.elongation(connected_buildings)
    fr = mm.facade_ratio(connected_buildings)
    thin = (fr < 8) & (elongation < 0.90)

    morph_fr_area_ratio = areas[thin].sum() / areas.sum()
    morph_fr_count_ratio = (
        connected_buildings[thin].count() / connected_buildings.count()
    )

    largest = areas > areas.median()
    largest_thin = thin & largest

    largest_morph_fr_area_ratio = areas[largest_thin].sum() / areas[largest].sum()

    limLPS = ((morph_fr_area_ratio > 0.4) | (libNCos > 0)).astype(int)

    ## add distances between buildings in the morphotope

    if connected_buildings.shape[0] > 3:
        tri = Graph.build_triangulation(
            connected_buildings.representative_point(),
            method="relative_neighborhood",
            kernel="identity",
        )
        val = tri._adjacency.describe()["std"] / connected_buildings.length.median()
    elif group.shape[0] < 3:
        # 99886 has a isolated building that gets treated as a morphotope
        val = 0.35
    else:
        tri = Graph.build_triangulation(
            group.representative_point(),
            method="relative_neighborhood",
            kernel="identity",
        )
        val = tri._adjacency.describe()["std"] / connected_buildings.length.median()

    limit_value = 10

    return pd.Series(
        {
            "limMFR": morph_fr_area_ratio,
            "limMTC": morph_fr_count_ratio,
            "limLMFR": largest_morph_fr_area_ratio,
            "limLPS": limLPS,
            "limAre": connected_buildings.geometry.area.sort_values(ascending=True)[
                -min(limit_value, connected_buildings.shape[0]) :
            ].sum(),
            "limPer": connected_buildings.geometry.length.sort_values(ascending=True)[
                -min(limit_value, connected_buildings.shape[0]) :
            ].sum(),
            "limLAL": mm.longest_axis_length(connected_buildings)
            .sort_values(ascending=True)[
                -min(limit_value, connected_buildings.shape[0]) :
            ]
            .sum(),
            "limSDi": val,
        }
    )

In [8]:
region_id = 99886  # vilnius
region_id = 69333  # prague

In [7]:
def add_morph_chars(region_id):
    etcs = morphotopes_to_etcs(region_id, model_params=model_params)
    buildings = gpd.read_parquet(
        f"/data/uscuni-ulce/processed_data/chars/buildings_chars_{region_id}.parquet"
    )

    morphs = etcs[etcs.morph.str.split("_").str[-1] != "-1"].to_crs(epsg=3035)
    buildings = buildings.loc[morphs.index]
    buildings["morph"] = etcs["morph"]
    buildings_q1 = read_parquet(graph_dir + f"building_graph_{region_id}.parquet")
    res = buildings.groupby("morph").apply(generate_ratio, buildings_q1)

    res.to_parquet(
        f"/data/uscuni-ulce/processed_data/morphotopes/morph_chars_{region_id}.pq"
    )

In [10]:
# add_morph_chars(69333)

In [12]:
# region_hulls = region_hulls.loc[[region_id]]

In [9]:
%%time
from joblib import Parallel, delayed

n_jobs = -1
new = Parallel(n_jobs=n_jobs)(
    delayed(add_morph_chars)(region_id) for region_id, _ in region_hulls.iterrows()
)



CPU times: user 1.88 s, sys: 1.05 s, total: 2.93 s
Wall time: 14min 4s


# Experiments

In [14]:
etcs = gpd.read_parquet(
    f"/data/uscuni-ulce/processed_data/chars/tessellations_chars_{region_id}.parquet"
)
tess = gpd.read_parquet(
    f"/data/uscuni-ulce/processed_data/chars/tessellations_chars_{region_id}.parquet"
)
buildings = gpd.read_parquet(
    f"/data/uscuni-ulce/processed_data/chars/buildings_chars_{region_id}.parquet"
)
streets = gpd.read_parquet(
    f"/data/uscuni-ulce/processed_data/chars/streets_chars_{region_id}.parquet"
)
nodes = gpd.read_parquet(
    f"/data/uscuni-ulce/processed_data/chars/nodes_chars_{region_id}.parquet"
)
primary = pd.read_parquet(
    f"/data/uscuni-ulce/processed_data/chars/primary_chars_{region_id}.parquet"
)

In [15]:
etcs = morphotopes_to_etcs(region_id, model_params=model_params)
buildings = gpd.read_parquet(
    f"/data/uscuni-ulce/processed_data/chars/buildings_chars_{region_id}.parquet"
)
morphs = etcs[etcs.morph.str.split("_").str[-1] != "-1"].to_crs(epsg=3035)
buildings = buildings.loc[morphs.index]
buildings["morph"] = etcs["morph"]
buildings_q1 = read_parquet(graph_dir + f"building_graph_{region_id}.parquet")

In [16]:
groups = buildings.groupby("morph")
group = groups.get_group("69333_849_640")

In [17]:
group_graph = buildings_q1.subgraph(group.index)
connected_buildings = group.geometry.groupby(group_graph.component_labels).apply(
    lambda x: unary_union(x.values)
)
libNCos = group.libNCo.median()
connected_buildings = connected_buildings

In [22]:
Graph.build_knn(connected_buildings.representative_point(), k=15)

<Graph of 28 nodes and 420 nonzero edges indexed by
 [0, 1, 2, 3, 4, ...]>

np.float64(63.05373992620591)

In [51]:
### add the connected structure characters
buildings_q1 = read_parquet(graph_dir + f"building_graph_{region_id}.parquet")
clusters = pd.read_parquet(
    f"{morph_dir}tessellation_labels_morphotopes_{region_id}_75_0_None_None_False.pq"
)

In [52]:
buildings = gpd.read_parquet(
    f"/data/uscuni-ulce/processed_data/chars/buildings_chars_{region_id}.parquet"
)
buildings["morph"] = clusters
buildings = buildings[buildings.morph.str.split("_").str[-1] != "-1"]

In [53]:
# buildings = buildings.join(primary.drop(columns=primary.columns[~primary.columns.isin(streets.columns)]))

In [54]:
morph_primary = primary.groupby(clusters.morphotope_label).median()

In [55]:
morphs_to_check = ["849_437", "849_486", "849_530"]

In [56]:
morph_primary.loc[morphs_to_check].style.background_gradient(axis=0, cmap="BuGn")

Unnamed: 0_level_0,sdbAre,sdbPer,sdbCoA,ssbCCo,ssbCor,ssbSqu,ssbERI,ssbElo,ssbCCM,ssbCCD,stbOri,mtbSWR,libNCo,ldbPWL,ltcBuA,mtbAli,mtbNDi,ltbIBD,stbCeA,stbSAl,sdsLen,sssLin,ldsMSL,ldsRea,ldsAre,sisBpM,sdsSPW,sdsSPO,sdsSWD,mtdDeg,lcdMes,linP3W,linP4W,linPDE,lcnClo,lddNDe,linWID,ldsCDL,xcnSCl,mtdMDi,sddAre,midRea,midAre,stcOri,sdcLAL,sdcAre,sscCCo,sscERI,mtcWNe,mdcAre,ltcWRB,sicCAR,stcSAl,ldkAre,ldkPer,lskCCo,lskERI,lskCWA,ltkOri,ltkWNB,likWBB,sdsAre,likWCe,mibCou,mibAre,mibLen,mibElo,mibERI,mibCCo,mibLAL,mibFR,mibSCo,micBAD,licBAD,misBAD,midBAD
morphotope_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1
849_437,859.241907,160.023023,0.0,0.372928,6.5,0.537349,0.892025,0.436748,16.35314,5.096996,13.46304,0.0,0.0,191.824077,1.0,1.692788,42.985776,44.363624,4.385041,2.736864,160.279569,0.999345,141.066275,40.0,325849.94667,0.014791,37.550729,0.678571,2.723619,3.0,0.062096,0.616667,0.173214,0.192308,2e-06,0.006401,0.011654,269.241291,0.0,135.774085,17306.180477,7.0,49321.808451,14.819522,137.098373,6685.951497,0.489169,0.967229,0.019503,61834.842413,2.4e-05,0.144241,3.363489,87015.401871,1446.048639,0.434799,0.859706,354.843976,27.061449,0.005947,0.144052,20464.764032,0.000149,1.0,1262.13017,191.271685,0.343404,0.863177,0.306046,74.51544,5.787116,5.787116,918.999758,2294.365067,113.35145,7.442156
849_486,270.390609,73.599375,0.0,0.489031,4.0,0.315569,0.99956,0.514805,13.285515,0.050556,38.350533,0.0,0.0,92.62662,1.0,1.015337,23.004441,30.99321,4.081491,2.865691,913.478904,0.996533,335.089914,253.0,1177635.379728,0.062148,36.93707,0.917411,5.793012,3.0,0.04878,0.791667,0.083333,0.125,0.0,0.003464,0.006199,1311.491326,0.0,754.804345,263407.949778,81.0,457167.974576,34.987521,79.805855,2317.068809,0.482116,0.982426,0.031781,27075.723908,8e-06,0.142837,6.747334,1282697.063445,10348.304584,0.33624,0.452199,2383.720505,38.490951,0.001643,0.153213,257918.023654,0.000253,1.0,411.302682,91.68452,0.530516,0.999874,0.483515,34.880123,4.537217,4.537217,598.422528,1584.358859,753.772258,1468.646816
849_530,115.402701,44.910727,0.0,0.514366,4.0,0.485372,0.999773,0.522744,8.002523,0.039238,2.542587,0.0,0.0,48.878672,1.0,2.361025,26.980348,31.387949,1.814443,1.390285,481.947861,0.999994,324.409365,250.0,1701981.371189,0.069826,50.0,1.0,2.187555,3.0,0.068966,0.581395,0.166667,0.232558,1e-06,0.003022,0.005213,1025.652356,0.076923,306.914028,127364.296664,47.0,282247.485876,8.331863,98.204769,3464.072867,0.479405,0.987021,0.025488,43314.668052,1.1e-05,0.060025,6.119848,530310.41618,3985.008822,0.381886,0.765244,982.556967,23.237544,0.001757,0.159062,120634.260347,0.000168,1.0,128.558489,47.936573,0.536681,1.002601,0.522347,17.611684,2.769253,2.769253,1069.251795,2841.58294,589.096235,1232.574771


In [57]:
from core.utils import used_keys

used_keys["ldsCDL"]

'local cul-de-sac length of street network'

In [58]:
groups = buildings.groupby("morph")

In [17]:
def generate_ratio(group, buildings_q1):
    group_graph = buildings_q1.subgraph(group.index)
    connected_buildings = group.geometry.groupby(group_graph.component_labels).apply(
        lambda x: unary_union(x.values)
    )
    libNCos = group.libNCo.median()
    connected_buildings = connected_buildings

    areas = connected_buildings.area
    elongation = mm.elongation(connected_buildings)
    fr = mm.facade_ratio(connected_buildings)
    thin = (fr < 8) & (elongation < 0.90)

    morph_fr_area_ratio = areas[thin].sum() / areas.sum()
    morph_fr_count_ratio = (
        connected_buildings[thin].count() / connected_buildings.count()
    )

    largest = areas > areas.median()
    largest_thin = thin & largest

    largest_morph_fr_area_ratio = areas[largest_thin].sum() / areas[largest].sum()

    limLPS = ((morph_fr_area_ratio > 0.4) | (libNCos > 0)).astype(int)

    return pd.Series(
        {
            "limMFR": morph_fr_area_ratio,
            "limMTC": morph_fr_count_ratio,
            "limLMFR": largest_morph_fr_area_ratio,
            "limLPS": limLPS,
        }
    )

In [164]:
def generate_ibd_blobs(group, buildings_q1):
    group_graph = buildings_q1.subgraph(group.index)
    connected_buildings = group.geometry.groupby(group_graph.component_labels).apply(
        lambda x: unary_union(x.values)
    )

    centroids = get_coordinates(connected_buildings.representative_point())

    linkage = single(pdist(centroids))
    return pd.Series(linkage[:, 2]).describe()


def generate_shape_variability(group, buildings_q1):
    group_graph = buildings_q1.subgraph(group.index)
    connected_buildings = group.geometry.groupby(group_graph.component_labels).apply(
        lambda x: unary_union(x.values)
    )

    data = pd.DataFrame(
        {
            "area": connected_buildings.area,
            "perimeter": connected_buildings.length,
            "fr": connected_buildings.area / connected_buildings.length,
        }
    )
    data = StandardScaler().fit_transform(data)
    linkage = single(pdist(data))
    return pd.Series(linkage[:, 2]).describe()


def generate_similarity_variability(group, buildings_q1):
    data = primary.loc[group.index]
    data = np.nan_to_num(StandardScaler().fit_transform(data))
    linkage = single(pdist(data))
    return pd.Series(linkage[:, 2]).describe()

In [165]:
res = groups.apply(generate_similarity_variability, buildings_q1)

  res = groups.apply(generate_similarity_variability, buildings_q1)


In [166]:
res

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
morph,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1000_0,85.0,5.585475,2.280293,1.963692,3.964885,5.010671,6.914986,15.625255
1005_0,74.0,5.570405,1.777569,1.913207,4.244364,5.328206,6.636810,11.989257
1016_0,223.0,5.096428,2.636045,0.536647,3.776080,4.700878,6.123471,24.393969
1016_1,114.0,6.056521,1.654434,2.044168,5.163466,5.976587,7.098941,10.807373
1016_2,90.0,6.557539,1.900395,1.722121,5.532435,6.504866,7.731026,10.839783
...,...,...,...,...,...,...,...,...
979_0,83.0,5.719634,2.255198,1.681904,4.073253,5.443868,6.874934,11.889941
991_0,110.0,5.693684,1.852669,1.635863,4.459126,5.454597,6.815337,10.778099
991_1,151.0,5.879028,1.901852,2.421977,4.516377,5.832435,6.969282,12.974264
997_0,119.0,5.803146,1.845017,1.342690,4.652741,5.980584,6.719658,12.846944


In [167]:
plotting = buildings[["geometry", "sdbAre"]].copy()
# plotting[col] = primary[col]

In [168]:
plotting["count"] = buildings.morph.map(res["count"].to_dict())
plotting["mean"] = buildings.morph.map(res["mean"].to_dict())
plotting["std"] = buildings.morph.map(res["std"].to_dict())
plotting["min"] = buildings.morph.map(res["min"].to_dict())
plotting["25%"] = buildings.morph.map(res["25%"].to_dict())
plotting["50%"] = buildings.morph.map(res["50%"].to_dict())
plotting["75%"] = buildings.morph.map(res["75%"].to_dict())
plotting["max"] = buildings.morph.map(res["max"].to_dict())

In [169]:
# plotting['morph_fr_area_ratio'] = buildings.morph.map(res.limMFR.to_dict())
# plotting['morph_fr_count_ratio'] = buildings.morph.map(res.limMTC.to_dict())
# plotting['morph_micBAD'] = buildings.morph.map(res.limLMFR.to_dict())
# plotting['limLPS'] = buildings.morph.map(res.limLPS.to_dict())

In [170]:
import lonboard

layer = lonboard.SolidPolygonLayer.from_geopandas(plotting, opacity=0.7)

from sidecar import Sidecar

sc = Sidecar(title="Morphotope chars")
m = lonboard.Map(layer, basemap_style=lonboard.basemap.CartoBasemap.DarkMatter)
with sc:
    display(m)



In [151]:
# res['limLPS'] = ((res['limMFR'] > .4) | (buildings['libNCo'] > 0)).astype(int)

In [152]:
# thin = (buildings.morph.map((morph_primary.sdsLen > 450).astype(int).to_dict())).astype(int)

# thin = (buildings.morph.map((morph_primary.midBAD > 1000).astype(int).to_dict())).astype(int)


# thin = (buildings.morph.map((morph_primary.mibERI > .99).astype(int).to_dict())).astype(int)


In [175]:
plotting["limLPS"] = (plotting["50%"] > 4.5).astype(int)

In [176]:
from core.cluster_validation import get_color

colors = get_color(plotting.limLPS)
layer.get_fill_color = colors

In [21]:
# connected_buildings.explore(column=thin, categorical=True)

In [36]:
col = "midBAD"

In [37]:
primary[col].describe()

count    448001.000000
mean        178.715170
std         631.432349
min           0.000000
25%          40.608968
50%          63.281226
75%         125.032743
max       48330.395936
Name: midBAD, dtype: float64

In [38]:
mean1 = primary[col].mean()
mean1, (primary[col] <= mean1).sum(), (primary[col] > mean1).sum()

(np.float64(178.71516966471), np.int64(366438), np.int64(81563))

In [39]:
mean2 = primary.loc[primary[col] > mean1, col].mean()
mean2, (primary[col] <= mean2).sum(), (primary[col] > mean2).sum()

(np.float64(703.1605884010245), np.int64(429250), np.int64(18751))

In [40]:
mean3 = primary.loc[primary[col] > mean2, col].mean()
mean3, (primary[col] <= mean3).sum(), (primary[col] > mean3).sum()

(np.float64(1919.496400573645), np.int64(443308), np.int64(4693))

In [41]:
mean4 = primary.loc[primary[col] > mean3, col].mean()
mean4, (primary[col] <= mean4).sum(), (primary[col] > mean4).sum()

(np.float64(4461.706616483338), np.int64(446564), np.int64(1437))

In [None]:
# streets.explore()