In [None]:
# !pip install git+git://github.com/jsignell/dask-geopandas.git
# !pip install git+git://github.com/geopandas/geopandas.git

In [1]:
from dask.distributed import Client, LocalCluster
import time
import dask_geopandas as dask_geopandas
import dask.dataframe as dd
import geopandas
import pygeos
import numpy as np
import pandas as pd
import momepy
from tqdm.notebook import tqdm
from libpysal.weights import Queen

from momepy_utils import _circle_radius, get_corners, squareness, elongation, centroid_corner, solar_orientation_poly



In [2]:
client = Client(LocalCluster(n_workers=14))
client

0,1
Client  Scheduler: tcp://127.0.0.1:45727  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 14  Cores: 28  Memory: 84.28 GB


In [3]:
import warnings

warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')
warnings.filterwarnings('ignore', message='.*Assigning CRS to a GeoDataFrame without a geometry*')

In [4]:
for chunk_id in tqdm(range(103), total=103):
    blg = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/buildings/blg_{chunk_id}.pq")
    tess = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/tessellation/tess_{chunk_id}.pq")
    
    blg = blg.rename_geometry('buildings')
    tess = tess.rename_geometry('tessellation')

    df = tess.merge(blg, on='uID', how='left')
    
    ddf = dask_geopandas.from_geopandas(df, npartitions=14)
    ddf['sdbAre'] = ddf.buildings.area
    ddf['sdbPer'] = ddf.buildings.length
    exterior_area = ddf.buildings.map_partitions(lambda series: pygeos.area(pygeos.polygons(series.exterior.values.data)), meta='float')
    ddf['sdbCoA'] = exterior_area - ddf['sdbAre']

    hull = ddf.buildings.convex_hull.exterior

    radius = hull.apply(lambda g: _circle_radius(list(g.coords)) if g is not None else None, meta='float')
    ddf['ssbCCo'] = ddf['sdbAre'] / (np.pi * radius ** 2)

    ddf['ssbCor'] = ddf.buildings.apply(lambda g: get_corners(g), meta='float')

    ddf['ssbSqu'] = ddf.buildings.apply(lambda g: squareness(g), meta='float')
    
    bbox = ddf.buildings.apply(lambda g: g.minimum_rotated_rectangle if g is not None else None, meta=geopandas.GeoSeries())
    ddf['ssbERI'] = (ddf['sdbAre'] / bbox.area).pow(1./2) * (bbox.length / ddf['sdbPer'])

    ddf['ssbElo'] = bbox.map_partitions(lambda s: elongation(s), meta='float')
    
    
    def _centroid_corner(series):
        ccd = series.apply(lambda g: centroid_corner(g))
        return pd.DataFrame(ccd.to_list(), index=series.index)

    
    ddf[['ssbCCM', 'ssbCCD']] = ddf.buildings.map_partitions(_centroid_corner, meta=pd.DataFrame({0: [0.1], 1: [1.1]}))
    
    ddf['stbOri'] = bbox.apply(lambda g: solar_orientation_poly(g), meta='float')
    
    hull = ddf.tessellation.convex_hull.exterior

    ddf['sdcLAL'] = hull.apply(lambda g: _circle_radius(list(g.coords)), meta='float') * 2
    
    ddf['sdcAre'] = ddf.tessellation.area
    

    radius = hull.apply(lambda g: _circle_radius(list(g.coords)), meta='float')
    ddf['sscCCo'] = ddf['sdcAre'] / (np.pi * radius ** 2)
    
    bbox = ddf.tessellation.apply(lambda g: g.minimum_rotated_rectangle, meta=geopandas.GeoSeries())
    ddf['sscERI'] = (ddf['sdcAre'] / bbox.area).pow(1./2) * (bbox.length / ddf.tessellation.length)
    
    ddf['stcOri'] = bbox.apply(lambda g: solar_orientation_poly(g), meta='float')
    
    ddf['sicCAR'] = ddf['sdbAre'] / ddf['sdcAre']
    
    ddf['stbCeA'] = (ddf['stbOri'] / ddf['stcOri']).abs()
    
    df = ddf.compute()
    df.to_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq")
    client.restart()
    time.sleep(5)

HBox(children=(FloatProgress(value=0.0, max=103.0), HTML(value='')))

Exception in thread AsyncProcess Dask Worker process (from Nanny) watch process join:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/opt/conda/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/lib/python3.7/site-packages/distributed/process.py", line 234, in _watch_process
    assert exitcode is not None
AssertionError






In [None]:
%%time
encl = dask_geopandas.read_parquet("../../urbangrammar_samba/spatial_signatures/enclosures/encl_*.pq")

encl['ldeAre'] = encl.geometry.area
encl['ldePer'] = encl.geometry.length

hull = encl.geometry.convex_hull.exterior

radius = hull.apply(lambda g: _circle_radius(list(g.coords)) if g is not None else None, meta='float')
encl['lseCCo'] = encl['ldeAre'] / (np.pi * radius ** 2)

bbox = encl.geometry.apply(lambda g: g.minimum_rotated_rectangle if g is not None else None, meta=geopandas.GeoSeries())
encl['lseERI'] = (encl['ldeAre'] / bbox.area).pow(1./2) * (bbox.length / encl['ldePer'])

longest_axis = hull.apply(lambda g: _circle_radius(list(g.coords)), meta='float') * 2
encl['lseCWA'] = longest_axis * ((4 / np.pi) - (16 * encl['ldeAre']) / ((encl['ldePer']) ** 2))

encl['lteOri'] = bbox.apply(lambda g: solar_orientation_poly(g), meta='float')

encl_df = encl.compute()

# make 3d geometry 2d
coords = pygeos.get_coordinates(encl_df.geometry.values.data)
counts = pygeos.get_num_coordinates(encl_df.geometry.values.data)
encl_df['geometry'] = geopandas.GeoSeries([pygeos.polygons(c) for c in np.split(coords, np.cumsum(counts)[:-1])], crs=encl_df.crs)

inp, res = encl_df.sindex.query_bulk(encl_df.geometry, predicate='intersects')
indices, counts = np.unique(inp, return_counts=True)
encl_df['neighbors'] = counts - 1
encl_df['lteWNB'] = encl_df['neighbors'] / encl_df['ldePer']

tess = dd.read_parquet("../../urbangrammar_samba/spatial_signatures/tessellation/tess_*.pq")
encl_counts = tess.groupby('enclosureID').count().compute()
merged = encl_df[['enclosureID', 'ldeAre']].merge(encl_counts[['geometry']], how='left', on='enclosureID')
encl_df['lieWCe'] = merged['geometry'] / merged['ldeAre']
encl_df.drop(columns='geometry').to_parquet("../../urbangrammar_samba/spatial_signatures/morphometrics/enclosures.pq")

In [5]:
client.close()