In [5]:
import glob
import re

import geopandas as gpd
import momepy as mm
import numpy as np
import pandas as pd
import shapely

regions_datadir = "/data/uscuni-ulce/"
data_dir = "/data/uscuni-ulce/processed_data/"
eubucco_files = glob.glob(regions_datadir + "eubucco_raw/*")
graph_dir = data_dir + "neigh_graphs/"
chars_dir = "/data/uscuni-ulce/processed_data/chars/"

In [123]:
# building_region_mapping = pd.read_parquet(regions_datadir + 'regions/' +'id_to_region.parquet', engine='pyarrow')
# counts = building_region_mapping.groupby('region')['id'].size()
# del building_region_mapping
# large_regions = counts[counts > 6e5].index
# large_regions

In [8]:
region_hulls = gpd.read_parquet(regions_datadir + "regions/" + "regions_hull.parquet")

In [9]:
def check_available():
    elements = ["buildings", "enclosures", "tessellations", "nodes", "streets"]
    for el in elements:
        el_ids = [
            int(re.findall(r"\d+", f)[0])
            for f in glob.glob(chars_dir + f"{el}/*.parquet")
        ]
        missing = np.setdiff1d(region_hulls.index.values, el_ids)
        print(f"Missing {el} for regions {missing}")


check_available()

Missing buildings for regions []
Missing enclosures for regions []
Missing tessellations for regions []
Missing nodes for regions []
Missing streets for regions []


In [10]:
# 12199 - hills, small test
# 69300 - prague medium
# 226 - germany somewhere, largest cluster

for region_id, region_hull in region_hulls.iterrows():
    if region_id != 69300:
        continue

    break
region_id

69300

### Merging data

In [11]:
tessellation = gpd.read_parquet(chars_dir + f"tessellations/chars_{region_id}.parquet")
buildings = gpd.read_parquet(chars_dir + f"buildings/chars_{region_id}.parquet")
enclosures = gpd.read_parquet(chars_dir + f"enclosures/chars_{region_id}.parquet")
streets = gpd.read_parquet(chars_dir + f"streets/chars_{region_id}.parquet")
nodes = gpd.read_parquet(chars_dir + f"nodes/chars_{region_id}.parquet")

In [12]:
merged = pd.merge(
    tessellation.drop(columns=["geometry"]),
    buildings.drop(columns=["nodeID", "geometry"]),
    right_index=True,
    left_index=True,
)

merged = merged.merge(
    enclosures.drop(columns="geometry"),
    right_on="eID",
    left_on="enclosure_index",
    how="left",
)

merged = merged.merge(streets.drop(columns="geometry"), on="nID", how="left")
merged = merged.merge(nodes.drop(columns="geometry"), on="nodeID", how="left")

In [13]:
primary = merged.drop(
    columns=[
        "nID",
        "eID",
        "nodeID",
        "mm_len",
        "cdsbool",
        "node_start",
        "node_end",
        "x",
        "y",
        "enclosure_index",
        "id",  ## maybe keep
    ]
)

In [14]:
# primary.to_parquet(chars_dir + f'primary_chars/chars_{region_id}.parquet')

In [11]:
primary = primary.drop(columns=["osm_id"])

### Checks

In [221]:
assert (buildings.index == primary.index).all()

In [222]:
stats = primary.describe()

In [223]:
assert not (
    stats.loc["max"] == stats.loc["min"]
).any()  ### there should be at least some data in any of the columns

In [224]:
for c in tessellation.columns:
    if c not in char_names:
        print(c)

enclosure_index
geometry
ltcWRB
nodeID


In [18]:
%%time
res = mm.buffered_limit(buildings, buffer="adaptive", max_buffer=200)

CPU times: user 25.7 s, sys: 372 ms, total: 26.1 s
Wall time: 26.1 s


In [22]:
polygons = [p for p in res.geoms]

In [26]:
# gpd.GeoDataFrame({'geometry': polygons}, crs=buildings.crs).explore()

In [225]:
# !conda install -c conda-forge lonboard -y
# !conda update -c conda-forge ipywidgets -y
# !conda install -c conda-forge jupyter_contrib_nbextensions -y
# !jupyter nbextension enable --py widgetsnbextension

## all of this then restart jupyterlab

In [227]:
tessellation.columns

Index(['enclosure_index', 'geometry', 'stcOri', 'sdcLAL', 'sdcAre', 'sscCCo',
       'sscERI', 'mtcWNe', 'mdcAre', 'ltcWRB', 'sicCAR', 'stcSAl', 'nodeID'],
      dtype='object')

In [228]:
from lonboard import PolygonLayer
from lonboard.colormap import apply_continuous_cmap
from palettable.colorbrewer.sequential import Oranges_9

In [229]:
# plotting = enclosures.copy()
# plotting['geometry'] = nodes.buffer(5).to_crs('EPSG:4326')


plotting = tessellation.to_crs("EPSG:4326")

In [230]:
# colors = apply_categorical_cmap(plotting_buildings['sdbCoA'], Set3_9)

In [231]:
plotting.columns

Index(['enclosure_index', 'geometry', 'stcOri', 'sdcLAL', 'sdcAre', 'sscCCo',
       'sscERI', 'mtcWNe', 'mdcAre', 'ltcWRB', 'sicCAR', 'stcSAl', 'nodeID'],
      dtype='object')

{'stcOri': 'degrees',
 'sdcLAL': 'metres',
 'sdcAre': 'area',
 'sscCCo': 'ratio',
 'sscERI': 'ratio',
 'mtcWNe': 'ratio',
 'mdcAre': 'area',
 'ltcWRB': 'count',
 'sicCAR': 'ratio',
 'stcSAl': 'degree'}

In [255]:
col = "stcSAl"

In [256]:
plotting[col].describe()

count    270879.000000
mean         14.954598
std          10.620680
min           0.000087
25%           5.993527
50%          13.026315
75%          22.439168
max          44.952705
Name: stcSAl, dtype: float64

In [257]:
char_names[col]

'street alignment of ETC'

In [199]:
# normaliser = LogNorm(1, plotting[col].max(), clip=True)
# colors = apply_continuous_cmap(normaliser(plotting[col]), Oranges_9)

In [258]:
colors = apply_continuous_cmap(plotting[col], Oranges_9)

In [259]:
layer = PolygonLayer.from_geopandas(gdf=plotting, get_fill_color=colors, opacity=0.1)

In [263]:
# m = Map(layer)
# m

In [16]:
squares = np.around(
    shapely.get_coordinates(tessellation.representative_point()), decimals=-3
)

In [17]:
building_vals = pd.Series(0, index=tessellation.index)
building_vals.loc[buildings.index.values] = 1

In [18]:
r = building_vals.groupby([squares[:, 0], squares[:, 1]]).sum().reset_index()
r.columns = ["x", "y", "vals"]
r = gpd.GeoDataFrame(
    {"geometry": gpd.points_from_xy(r["x"], r["y"]), "vals": r["vals"]},
    crs=tessellation.crs,
)
r["geometry"] = r.buffer(500, cap_style=3)

In [20]:
# r.explore(column='vals', opacity=.1)

In [None]:
### bubenec is all negative - no buildings...

In [161]:
%%time
r = tessellation.dissolve(tessellation.index > 0)

CPU times: user 26.8 s, sys: 1.55 ms, total: 26.8 s
Wall time: 26.9 s


In [164]:
# r.explore(column=r.index.values.astype(int), categorial=True, prefer_canvas=True)

In [328]:
test_file_path = mm.datasets.get_path("bubenec")
df_tessellation = gpd.read_file(test_file_path, layer="tessellation")
buffer_pol = df_tessellation.dissolve().buffer(2000).to_crs(buildings.crs)

In [329]:
_, ints = buildings.geometry.sindex.query(buffer_pol, predicate="intersects")

In [330]:
ints.shape

(551,)

In [331]:
# m = buffer_pol.explore()
# m = buildings.iloc[ints].explore(color='green', m=m)
# m

In [332]:
building_region_mapping = pd.read_parquet(
    regions_datadir + "regions/" + "id_to_region.parquet", engine="pyarrow"
)
typed_dict = pd.Series(
    np.arange(building_region_mapping["id"].values.shape[0]),
    index=building_region_mapping["id"].values,
)
region_ids = building_region_mapping.groupby("region")["id"].unique()
del building_region_mapping  # its 2/3 gb
region_hulls = gpd.read_parquet(regions_datadir + "regions/" + "regions_hull.parquet")

In [333]:
from generate_buildings import read_region_buildings

region_hull = region_hull["convex_hull"]
unprocessed_buildings = read_region_buildings(
    typed_dict, region_ids, region_hull, region_id
)

TypeError: 'Polygon' object is not subscriptable

In [334]:
_, ints = unprocessed_buildings.geometry.sindex.query(
    buffer_pol, predicate="intersects"
)

In [335]:
ints.shape

(916,)

In [337]:
# m = buffer_pol.explore()
# m = unprocessed_buildings.iloc[ints].explore(color='green', m=m)
# m

In [2]:
building_units = {
    "sdbAre": "area",  # higher-larger building
    "sdbPer": "metres",  # higher - larger building
    "sdbCoA": "area",  # higher - larger holes in the polygon
    "ssbCCo": "ratio",  # higher - more like a circle
    "ssbCor": "count",  # higher - more corners
    "ssbSqu": "degrees",  # difference from min. square - lower - more squarelike
    "ssbERI": "ratio",  # higher - more rectangular
    "ssbElo": "ratio",  # higher - more elongated
    "ssbCCM": "metres",  # higher - less square-like and larger building
    "ssbCCD": "ratio",  # higher - less square-like
    "stbOri": "degrees",  # lower value- more, proper gridlike
    "mtbSWR": "ratio",  # higher - more shared walls
    "libNCo": "count-neighbourhood",  # higher more courtyards
    "ldbPWL": "metres-neighbourhood",  # higher more touching buildings
    "ltcBuA": "ratio-neigbhourhood",  # lower more clustered
    "mtbAli": "degrees",  # lower -> more gridlike
    "mtbNDi": "metres-neighbourhood",  # higher - closest building are futher apart
    "ltbIBD": "metres-neighbourhood",  # higher - neighbourhood buildings are futher apart,
    "stbCeA": "degrees",  # lower the more the teselation resembles the building,
    "stbSAl": "degrees",  # lower the more the building resembles the street, ,
}

street_units = {
    "sdsLen": "metres",
    "sssLin": "ratio",  # higher - more linear, most are linear...
    "ldsMSL": "metres-neighbourhood",  ## higher - in larger network
    "ldsRea": "count-neighbourhood",  ## higher - more tess cells(denser area)
    "ldsAre": "area-neigbourhood",  ## higher - denser area
    "sisBpM": "ratio",  ## higher - more buildings, denser are
    "sdsSPW": "metres",  ## street width, higher - wider, has a fixed max - 50
    "sdsSPO": "ratio",  ## higher - less buildings on the street
    "sdsSWD": "ratio",  ## higher  - higher - street changes width more
}

nodes_units = {
    "mtdDeg": "count",  ## higher - more streets go through the point
    "lcdMes": "ratio",  ## higher - more neigbhourhood connections
    "linP3W": "ratio",  ## higher - more 3ways
    "linP4W": "ratio",  ## higher - more 4ways
    "linPDE": "ratio",  ## higher - more deadends
    "lcnClo": "ratio",  ## higher - nodes are closer together
    "lddNDe": "ratio",  ##higher - more nodes or shorter paths
    "linWID": "ratio",  ##higher - more node degrees or shorter paths
    "ldsCDL": "metres",  ##higher - more or longer culdesacs
    "xcnSCl": "ratio",  ##higher - more gridlike connections around node
    "mtdMDi": "metres",  ## higher - node further apart from dense areas, or less neighbours...
    "sddAre": "area",  ## higher - more or larger attached tess cells
    "midRea": "count",  ## higher - denser area with more tess cells
    "midAre": "area",  ## higher - node in denser area or more tess cells.
}


enclosure_units = {
    "ldkAre": "area",
    "ldkPer": "metres",
    "lskCCo": "count",
    "lskERI": "ratio",  # higher = more rectangular
    "lskCWA": "metres",  # higher more compact or larger
    "ltkOri": "degrees",  # \\more left leaning ?
    "ltkWNB": "ratio",
    "likWBB": "ratio",
}


tess_units = {
    "stcOri": "degrees",  # higher more off-cardinal axis the polygon is
    "sdcLAL": "metres",  ## higher larger or more elongated ETC
    "sdcAre": "area",
    "sscCCo": "ratio",  # higher - more circular
    "sscERI": "ratio",  # higher - more rectangular
    "mtcWNe": "ratio",  # higher - more neighbours or smaller neigbhourhood area
    "mdcAre": "area",  # sum of tess areas in neighbourhood
    "ltcWRB": "count",  # higher - more unique enclosures around cell
    "sicCAR": "ratio",  # higher - building area more similar to tess area -> typically more tess in neighbourhood
    "stcSAl": "degrees",  # higher - follows street more
}

In [3]:
char_units = {
    **building_units,
    **street_units,
    **nodes_units,
    **tess_units,
    **enclosure_units,
}

In [4]:
char_units

{'sdbAre': 'area',
 'sdbPer': 'metres',
 'sdbCoA': 'area',
 'ssbCCo': 'ratio',
 'ssbCor': 'count',
 'ssbSqu': 'degrees',
 'ssbERI': 'ratio',
 'ssbElo': 'ratio',
 'ssbCCM': 'metres',
 'ssbCCD': 'ratio',
 'stbOri': 'degrees',
 'mtbSWR': 'ratio',
 'libNCo': 'count-neighbourhood',
 'ldbPWL': 'metres-neighbourhood',
 'ltcBuA': 'ratio-neigbhourhood',
 'mtbAli': 'degrees',
 'mtbNDi': 'metres-neighbourhood',
 'ltbIBD': 'metres-neighbourhood',
 'stbCeA': 'degrees',
 'stbSAl': 'degrees',
 'sdsLen': 'metres',
 'sssLin': 'ratio',
 'ldsMSL': 'metres-neighbourhood',
 'ldsRea': 'count-neighbourhood',
 'ldsAre': 'area-neigbourhood',
 'sisBpM': 'ratio',
 'sdsSPW': 'metres',
 'sdsSPO': 'ratio',
 'sdsSWD': 'ratio',
 'mtdDeg': 'count',
 'lcdMes': 'ratio',
 'linP3W': 'ratio',
 'linP4W': 'ratio',
 'linPDE': 'ratio',
 'lcnClo': 'ratio',
 'lddNDe': 'ratio',
 'linWID': 'ratio',
 'ldsCDL': 'metres',
 'xcnSCl': 'ratio',
 'mtdMDi': 'metres',
 'sddAre': 'area',
 'midRea': 'count',
 'midAre': 'area',
 'stcOri': 'de

In [73]:
import numpy as np
import pandas as pd

In [74]:
np.unique(list(char_units.values()))

array(['area', 'area-neigbourhood', 'count', 'count-neighbourhood',
       'degrees', 'metres', 'metres-neighbourhood', 'ratio',
       'ratio-neigbhourhood'], dtype='<U20')

In [75]:
char_units = pd.Series(char_units)

In [76]:
char_units.groupby(char_units).count()

area                     7
area-neigbourhood        1
count                    5
count-neighbourhood      2
degrees                  8
metres                   9
metres-neighbourhood     4
ratio                   24
ratio-neigbhourhood      1
dtype: int64

In [77]:
agg_chars = char_units.str.split("-").str[0]
agg_chars.groupby(agg_chars).size()

area        8
count       7
degrees     8
metres     13
ratio      25
dtype: int64

In [87]:
agg_chars[agg_chars == "area"].index.values

array(['sdbAre', 'sdbCoA', 'ldsAre', 'sddAre', 'midAre', 'sdcAre',
       'mdcAre', 'ldkAre'], dtype=object)

In [88]:
used = {c: char_names[c] for c in char_units.index}
used

{'sdbAre': 'area of building',
 'sdbPer': 'perimeter of building',
 'sdbCoA': 'courtyard area of building',
 'ssbCCo': 'circular compactness of building',
 'ssbCor': 'corners of building',
 'ssbSqu': 'squareness of building',
 'ssbERI': 'equivalent rectangular index of building',
 'ssbElo': 'elongation of building',
 'ssbCCM': 'centroid - corner mean distance of building',
 'ssbCCD': 'centroid - corner distance deviation of building',
 'stbOri': 'orientation of building',
 'mtbSWR': 'shared walls ratio of buildings',
 'libNCo': 'number of courtyards within adjacent buildings',
 'ldbPWL': 'perimeter wall length of adjacent buildings',
 'ltcBuA': 'level of building adjacency',
 'mtbAli': 'alignment of neighbouring buildings',
 'mtbNDi': 'mean distance between neighbouring buildings',
 'ltbIBD': 'mean inter-building distance',
 'stbCeA': 'cell alignment of building',
 'stbSAl': 'street alignment of building',
 'sdsLen': 'length of street segment',
 'sssLin': 'linearity of street segment',